Blame view

arch/x86/crypto/twofish-avx-x86_64-asm_64.S 10.5 KB
1a59d1b8e   Thomas Gleixner   treewide: Replace...
1
  /* SPDX-License-Identifier: GPL-2.0-or-later */
107778b59   Johannes Goetzfried   crypto: twofish -...
2
3
4
5
6
7
  /*
   * Twofish Cipher 8-way parallel algorithm (AVX/x86_64)
   *
   * Copyright (C) 2012 Johannes Goetzfried
   *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
   *
18be45270   Jussi Kivilinna   crypto: x86/twofi...
8
   * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
107778b59   Johannes Goetzfried   crypto: twofish -...
9
   */
d3f5188df   Jussi Kivilinna   crypto: x86/twofi...
10
  #include <linux/linkage.h>
8691ccd76   Josh Poimboeuf   x86/asm/crypto: C...
11
  #include <asm/frame.h>
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
12
  #include "glue_helper-asm-avx.S"
107778b59   Johannes Goetzfried   crypto: twofish -...
13
  .file "twofish-avx-x86_64-asm_64.S"
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
14

e183914af   Denys Vlasenko   crypto: x86 - mak...
15
  .section	.rodata.cst16.bswap128_mask, "aM", @progbits, 16
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
16
  .align 16
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
17
18
  .Lbswap128_mask:
  	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
e183914af   Denys Vlasenko   crypto: x86 - mak...
19
20
21
  
  .section	.rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16
  .align 16
18be45270   Jussi Kivilinna   crypto: x86/twofi...
22
23
  .Lxts_gf128mul_and_shl1_mask:
  	.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
24

107778b59   Johannes Goetzfried   crypto: twofish -...
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
  .text
  
  /* structure of crypto context */
  #define s0	0
  #define s1	1024
  #define s2	2048
  #define s3	3072
  #define w	4096
  #define k	4128
  
  /**********************************************************************
    8-way AVX twofish
   **********************************************************************/
  #define CTX %rdi
  
  #define RA1 %xmm0
  #define RB1 %xmm1
  #define RC1 %xmm2
  #define RD1 %xmm3
  
  #define RA2 %xmm4
  #define RB2 %xmm5
  #define RC2 %xmm6
  #define RD2 %xmm7
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
49
50
51
52
53
  #define RX0 %xmm8
  #define RY0 %xmm9
  
  #define RX1 %xmm10
  #define RY1 %xmm11
107778b59   Johannes Goetzfried   crypto: twofish -...
54

f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
55
56
  #define RK1 %xmm12
  #define RK2 %xmm13
107778b59   Johannes Goetzfried   crypto: twofish -...
57

f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
58
59
  #define RT %xmm14
  #define RR %xmm15
8f182f845   Josh Poimboeuf   crypto: x86/twofi...
60
61
  #define RID1  %r13
  #define RID1d %r13d
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
62
63
  #define RID2  %rsi
  #define RID2d %esi
107778b59   Johannes Goetzfried   crypto: twofish -...
64
65
66
67
68
69
70
  
  #define RGI1   %rdx
  #define RGI1bl %dl
  #define RGI1bh %dh
  #define RGI2   %rcx
  #define RGI2bl %cl
  #define RGI2bh %ch
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
71
72
73
74
75
76
  #define RGI3   %rax
  #define RGI3bl %al
  #define RGI3bh %ah
  #define RGI4   %rbx
  #define RGI4bl %bl
  #define RGI4bh %bh
107778b59   Johannes Goetzfried   crypto: twofish -...
77
78
79
80
81
82
  #define RGS1  %r8
  #define RGS1d %r8d
  #define RGS2  %r9
  #define RGS2d %r9d
  #define RGS3  %r10
  #define RGS3d %r10d
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
83
84
85
  #define lookup_32bit(t0, t1, t2, t3, src, dst, interleave_op, il_reg) \
  	movzbl		src ## bl,        RID1d;     \
  	movzbl		src ## bh,        RID2d;     \
107778b59   Johannes Goetzfried   crypto: twofish -...
86
  	shrq $16,	src;                         \
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
87
88
89
90
91
92
  	movl		t0(CTX, RID1, 4), dst ## d;  \
  	movl		t1(CTX, RID2, 4), RID2d;     \
  	movzbl		src ## bl,        RID1d;     \
  	xorl		RID2d,            dst ## d;  \
  	movzbl		src ## bh,        RID2d;     \
  	interleave_op(il_reg);			     \
107778b59   Johannes Goetzfried   crypto: twofish -...
93
94
  	xorl		t2(CTX, RID1, 4), dst ## d;  \
  	xorl		t3(CTX, RID2, 4), dst ## d;
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
  #define dummy(d) /* do nothing */
  
  #define shr_next(reg) \
  	shrq $16,	reg;
  
  #define G(gi1, gi2, x, t0, t1, t2, t3) \
  	lookup_32bit(t0, t1, t2, t3, ##gi1, RGS1, shr_next, ##gi1);  \
  	lookup_32bit(t0, t1, t2, t3, ##gi2, RGS3, shr_next, ##gi2);  \
  	\
  	lookup_32bit(t0, t1, t2, t3, ##gi1, RGS2, dummy, none);      \
  	shlq $32,	RGS2;                                        \
  	orq		RGS1, RGS2;                                  \
  	lookup_32bit(t0, t1, t2, t3, ##gi2, RGS1, dummy, none);      \
  	shlq $32,	RGS1;                                        \
  	orq		RGS1, RGS3;
  
  #define round_head_2(a, b, x1, y1, x2, y2) \
  	vmovq		b ## 1, RGI3;           \
  	vpextrq $1,	b ## 1, RGI4;           \
107778b59   Johannes Goetzfried   crypto: twofish -...
114
  	\
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
115
116
117
118
119
  	G(RGI1, RGI2, x1, s0, s1, s2, s3);      \
  	vmovq		a ## 2, RGI1;           \
  	vpextrq $1,	a ## 2, RGI2;           \
  	vmovq		RGS2, x1;               \
  	vpinsrq $1,	RGS3, x1, x1;           \
107778b59   Johannes Goetzfried   crypto: twofish -...
120
  	\
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
121
122
123
124
125
  	G(RGI3, RGI4, y1, s1, s2, s3, s0);      \
  	vmovq		b ## 2, RGI3;           \
  	vpextrq $1,	b ## 2, RGI4;           \
  	vmovq		RGS2, y1;               \
  	vpinsrq $1,	RGS3, y1, y1;           \
107778b59   Johannes Goetzfried   crypto: twofish -...
126
  	\
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
127
128
129
130
131
132
133
  	G(RGI1, RGI2, x2, s0, s1, s2, s3);      \
  	vmovq		RGS2, x2;               \
  	vpinsrq $1,	RGS3, x2, x2;           \
  	\
  	G(RGI3, RGI4, y2, s1, s2, s3, s0);      \
  	vmovq		RGS2, y2;               \
  	vpinsrq $1,	RGS3, y2, y2;
107778b59   Johannes Goetzfried   crypto: twofish -...
134

f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
135
  #define encround_tail(a, b, c, d, x, y, prerotate) \
107778b59   Johannes Goetzfried   crypto: twofish -...
136
  	vpaddd			x, y,   x; \
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
137
138
139
  	vpaddd			x, RK1, RT;\
  	prerotate(b);			   \
  	vpxor			RT, c,  c; \
107778b59   Johannes Goetzfried   crypto: twofish -...
140
  	vpaddd			y, x,   y; \
107778b59   Johannes Goetzfried   crypto: twofish -...
141
  	vpaddd			y, RK2, y; \
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
142
  	vpsrld $1,		c, RT;     \
107778b59   Johannes Goetzfried   crypto: twofish -...
143
  	vpslld $(32 - 1),	c, c;      \
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
144
145
146
147
  	vpor			c, RT,  c; \
  	vpxor			d, y,   d; \
  
  #define decround_tail(a, b, c, d, x, y, prerotate) \
107778b59   Johannes Goetzfried   crypto: twofish -...
148
  	vpaddd			x, y,   x; \
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
149
150
151
  	vpaddd			x, RK1, RT;\
  	prerotate(a);			   \
  	vpxor			RT, c,  c; \
107778b59   Johannes Goetzfried   crypto: twofish -...
152
153
154
155
156
157
  	vpaddd			y, x,   y; \
  	vpaddd			y, RK2, y; \
  	vpxor			d, y,   d; \
  	vpsrld $1,		d, y;      \
  	vpslld $(32 - 1),	d, d;      \
  	vpor			d, y,   d; \
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
  
  #define rotate_1l(x) \
  	vpslld $1,		x, RR;     \
  	vpsrld $(32 - 1),	x, x;      \
  	vpor			x, RR,  x;
  
  #define preload_rgi(c) \
  	vmovq			c, RGI1; \
  	vpextrq $1,		c, RGI2;
  
  #define encrypt_round(n, a, b, c, d, preload, prerotate) \
  	vbroadcastss (k+4*(2*(n)))(CTX),   RK1;                  \
  	vbroadcastss (k+4*(2*(n)+1))(CTX), RK2;                  \
  	round_head_2(a, b, RX0, RY0, RX1, RY1);                  \
  	encround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \
  	preload(c ## 1);                                         \
  	encround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate);
  
  #define decrypt_round(n, a, b, c, d, preload, prerotate) \
  	vbroadcastss (k+4*(2*(n)))(CTX),   RK1;                  \
  	vbroadcastss (k+4*(2*(n)+1))(CTX), RK2;                  \
  	round_head_2(a, b, RX0, RY0, RX1, RY1);                  \
  	decround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \
  	preload(c ## 1);                                         \
  	decround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate);
107778b59   Johannes Goetzfried   crypto: twofish -...
183
184
  
  #define encrypt_cycle(n) \
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
185
186
187
188
189
190
  	encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \
  	encrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l);
  
  #define encrypt_cycle_last(n) \
  	encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \
  	encrypt_round(((2*n) + 1), RC, RD, RA, RB, dummy, dummy);
107778b59   Johannes Goetzfried   crypto: twofish -...
191
192
  
  #define decrypt_cycle(n) \
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
193
194
  	decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \
  	decrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l);
107778b59   Johannes Goetzfried   crypto: twofish -...
195

f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
196
197
198
  #define decrypt_cycle_last(n) \
  	decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \
  	decrypt_round((2*n), RA, RB, RC, RD, dummy, dummy);
107778b59   Johannes Goetzfried   crypto: twofish -...
199
200
201
202
203
204
205
206
207
208
209
  
  #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
  	vpunpckldq		x1, x0, t0; \
  	vpunpckhdq		x1, x0, t2; \
  	vpunpckldq		x3, x2, t1; \
  	vpunpckhdq		x3, x2, x3; \
  	\
  	vpunpcklqdq		t1, t0, x0; \
  	vpunpckhqdq		t1, t0, x1; \
  	vpunpcklqdq		x3, t2, x2; \
  	vpunpckhqdq		x3, t2, x3;
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
210
211
212
213
214
  #define inpack_blocks(x0, x1, x2, x3, wkey, t0, t1, t2) \
  	vpxor		x0, wkey, x0; \
  	vpxor		x1, wkey, x1; \
  	vpxor		x2, wkey, x2; \
  	vpxor		x3, wkey, x3; \
107778b59   Johannes Goetzfried   crypto: twofish -...
215
216
  	\
  	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
217
  #define outunpack_blocks(x0, x1, x2, x3, wkey, t0, t1, t2) \
107778b59   Johannes Goetzfried   crypto: twofish -...
218
219
  	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
  	\
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
220
221
222
223
  	vpxor		x0, wkey, x0; \
  	vpxor		x1, wkey, x1; \
  	vpxor		x2, wkey, x2; \
  	vpxor		x3, wkey, x3;
107778b59   Johannes Goetzfried   crypto: twofish -...
224
225
  
  .align 8
74d8b90a8   Jiri Slaby   x86/asm/crypto: A...
226
  SYM_FUNC_START_LOCAL(__twofish_enc_blk8)
107778b59   Johannes Goetzfried   crypto: twofish -...
227
228
  	/* input:
  	 *	%rdi: ctx, CTX
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
229
230
231
  	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
  	 * output:
  	 *	RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2: encrypted blocks
107778b59   Johannes Goetzfried   crypto: twofish -...
232
  	 */
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
233
  	vmovdqu w(CTX), RK1;
8f182f845   Josh Poimboeuf   crypto: x86/twofi...
234
  	pushq %r13;
107778b59   Johannes Goetzfried   crypto: twofish -...
235
236
  	pushq %rbx;
  	pushq %rcx;
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
237
  	inpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
238
239
  	preload_rgi(RA1);
  	rotate_1l(RD1);
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
240
  	inpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
241
  	rotate_1l(RD2);
107778b59   Johannes Goetzfried   crypto: twofish -...
242

107778b59   Johannes Goetzfried   crypto: twofish -...
243
244
245
246
247
248
249
  	encrypt_cycle(0);
  	encrypt_cycle(1);
  	encrypt_cycle(2);
  	encrypt_cycle(3);
  	encrypt_cycle(4);
  	encrypt_cycle(5);
  	encrypt_cycle(6);
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
250
  	encrypt_cycle_last(7);
107778b59   Johannes Goetzfried   crypto: twofish -...
251
252
253
254
255
  
  	vmovdqu (w+4*4)(CTX), RK1;
  
  	popq %rcx;
  	popq %rbx;
8f182f845   Josh Poimboeuf   crypto: x86/twofi...
256
  	popq %r13;
107778b59   Johannes Goetzfried   crypto: twofish -...
257

8435a3c30   Jussi Kivilinna   crypto: twofish/a...
258
259
  	outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
  	outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
107778b59   Johannes Goetzfried   crypto: twofish -...
260
261
  
  	ret;
74d8b90a8   Jiri Slaby   x86/asm/crypto: A...
262
  SYM_FUNC_END(__twofish_enc_blk8)
107778b59   Johannes Goetzfried   crypto: twofish -...
263
264
  
  .align 8
74d8b90a8   Jiri Slaby   x86/asm/crypto: A...
265
  SYM_FUNC_START_LOCAL(__twofish_dec_blk8)
107778b59   Johannes Goetzfried   crypto: twofish -...
266
267
  	/* input:
  	 *	%rdi: ctx, CTX
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
268
269
270
  	 *	RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2: encrypted blocks
  	 * output:
  	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
107778b59   Johannes Goetzfried   crypto: twofish -...
271
  	 */
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
272
  	vmovdqu (w+4*4)(CTX), RK1;
8f182f845   Josh Poimboeuf   crypto: x86/twofi...
273
  	pushq %r13;
107778b59   Johannes Goetzfried   crypto: twofish -...
274
  	pushq %rbx;
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
275
  	inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
276
277
  	preload_rgi(RC1);
  	rotate_1l(RA1);
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
278
  	inpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
279
  	rotate_1l(RA2);
107778b59   Johannes Goetzfried   crypto: twofish -...
280

107778b59   Johannes Goetzfried   crypto: twofish -...
281
282
283
284
285
286
287
  	decrypt_cycle(7);
  	decrypt_cycle(6);
  	decrypt_cycle(5);
  	decrypt_cycle(4);
  	decrypt_cycle(3);
  	decrypt_cycle(2);
  	decrypt_cycle(1);
f94a73f8d   Jussi Kivilinna   crypto: twofish-a...
288
  	decrypt_cycle_last(0);
107778b59   Johannes Goetzfried   crypto: twofish -...
289
290
291
292
  
  	vmovdqu (w)(CTX), RK1;
  
  	popq %rbx;
8f182f845   Josh Poimboeuf   crypto: x86/twofi...
293
  	popq %r13;
107778b59   Johannes Goetzfried   crypto: twofish -...
294

8435a3c30   Jussi Kivilinna   crypto: twofish/a...
295
296
297
298
  	outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
  	outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
  
  	ret;
74d8b90a8   Jiri Slaby   x86/asm/crypto: A...
299
  SYM_FUNC_END(__twofish_dec_blk8)
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
300

6dcc5627f   Jiri Slaby   x86/asm: Change a...
301
  SYM_FUNC_START(twofish_ecb_enc_8way)
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
302
303
304
305
306
  	/* input:
  	 *	%rdi: ctx, CTX
  	 *	%rsi: dst
  	 *	%rdx: src
  	 */
8691ccd76   Josh Poimboeuf   x86/asm/crypto: C...
307
  	FRAME_BEGIN
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
308
309
310
311
312
313
314
315
  
  	movq %rsi, %r11;
  
  	load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
  
  	call __twofish_enc_blk8;
  
  	store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
8691ccd76   Josh Poimboeuf   x86/asm/crypto: C...
316
  	FRAME_END
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
317
  	ret;
6dcc5627f   Jiri Slaby   x86/asm: Change a...
318
  SYM_FUNC_END(twofish_ecb_enc_8way)
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
319

6dcc5627f   Jiri Slaby   x86/asm: Change a...
320
  SYM_FUNC_START(twofish_ecb_dec_8way)
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
321
322
323
324
325
  	/* input:
  	 *	%rdi: ctx, CTX
  	 *	%rsi: dst
  	 *	%rdx: src
  	 */
8691ccd76   Josh Poimboeuf   x86/asm/crypto: C...
326
  	FRAME_BEGIN
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
327
328
329
330
331
332
333
334
  
  	movq %rsi, %r11;
  
  	load_8way(%rdx, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
  
  	call __twofish_dec_blk8;
  
  	store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
8691ccd76   Josh Poimboeuf   x86/asm/crypto: C...
335
  	FRAME_END
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
336
  	ret;
6dcc5627f   Jiri Slaby   x86/asm: Change a...
337
  SYM_FUNC_END(twofish_ecb_dec_8way)
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
338

6dcc5627f   Jiri Slaby   x86/asm: Change a...
339
  SYM_FUNC_START(twofish_cbc_dec_8way)
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
340
341
342
343
344
  	/* input:
  	 *	%rdi: ctx, CTX
  	 *	%rsi: dst
  	 *	%rdx: src
  	 */
8691ccd76   Josh Poimboeuf   x86/asm/crypto: C...
345
  	FRAME_BEGIN
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
346
347
348
349
350
351
352
353
354
355
356
357
358
  
  	pushq %r12;
  
  	movq %rsi, %r11;
  	movq %rdx, %r12;
  
  	load_8way(%rdx, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
  
  	call __twofish_dec_blk8;
  
  	store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
  
  	popq %r12;
8691ccd76   Josh Poimboeuf   x86/asm/crypto: C...
359
  	FRAME_END
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
360
  	ret;
6dcc5627f   Jiri Slaby   x86/asm: Change a...
361
  SYM_FUNC_END(twofish_cbc_dec_8way)
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
362

6dcc5627f   Jiri Slaby   x86/asm: Change a...
363
  SYM_FUNC_START(twofish_ctr_8way)
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
364
365
366
367
368
369
  	/* input:
  	 *	%rdi: ctx, CTX
  	 *	%rsi: dst
  	 *	%rdx: src
  	 *	%rcx: iv (little endian, 128bit)
  	 */
8691ccd76   Josh Poimboeuf   x86/asm/crypto: C...
370
  	FRAME_BEGIN
8435a3c30   Jussi Kivilinna   crypto: twofish/a...
371
372
373
374
375
376
377
378
379
380
381
382
383
384
  
  	pushq %r12;
  
  	movq %rsi, %r11;
  	movq %rdx, %r12;
  
  	load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
  		      RD2, RX0, RX1, RY0);
  
  	call __twofish_enc_blk8;
  
  	store_ctr_8way(%r12, %r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
  
  	popq %r12;
107778b59   Johannes Goetzfried   crypto: twofish -...
385

8691ccd76   Josh Poimboeuf   x86/asm/crypto: C...
386
  	FRAME_END
107778b59   Johannes Goetzfried   crypto: twofish -...
387
  	ret;
6dcc5627f   Jiri Slaby   x86/asm: Change a...
388
  SYM_FUNC_END(twofish_ctr_8way)
18be45270   Jussi Kivilinna   crypto: x86/twofi...
389

6dcc5627f   Jiri Slaby   x86/asm: Change a...
390
  SYM_FUNC_START(twofish_xts_enc_8way)
18be45270   Jussi Kivilinna   crypto: x86/twofi...
391
392
393
394
395
396
  	/* input:
  	 *	%rdi: ctx, CTX
  	 *	%rsi: dst
  	 *	%rdx: src
  	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
  	 */
8691ccd76   Josh Poimboeuf   x86/asm/crypto: C...
397
  	FRAME_BEGIN
18be45270   Jussi Kivilinna   crypto: x86/twofi...
398
399
400
401
402
403
404
405
406
407
408
  
  	movq %rsi, %r11;
  
  	/* regs <= src, dst <= IVs, regs <= regs xor IVs */
  	load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
  		      RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask);
  
  	call __twofish_enc_blk8;
  
  	/* dst <= regs xor IVs(in dst) */
  	store_xts_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
8691ccd76   Josh Poimboeuf   x86/asm/crypto: C...
409
  	FRAME_END
18be45270   Jussi Kivilinna   crypto: x86/twofi...
410
  	ret;
6dcc5627f   Jiri Slaby   x86/asm: Change a...
411
  SYM_FUNC_END(twofish_xts_enc_8way)
18be45270   Jussi Kivilinna   crypto: x86/twofi...
412

6dcc5627f   Jiri Slaby   x86/asm: Change a...
413
  SYM_FUNC_START(twofish_xts_dec_8way)
18be45270   Jussi Kivilinna   crypto: x86/twofi...
414
415
416
417
418
419
  	/* input:
  	 *	%rdi: ctx, CTX
  	 *	%rsi: dst
  	 *	%rdx: src
  	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
  	 */
8691ccd76   Josh Poimboeuf   x86/asm/crypto: C...
420
  	FRAME_BEGIN
18be45270   Jussi Kivilinna   crypto: x86/twofi...
421
422
423
424
425
426
427
428
429
430
431
  
  	movq %rsi, %r11;
  
  	/* regs <= src, dst <= IVs, regs <= regs xor IVs */
  	load_xts_8way(%rcx, %rdx, %rsi, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2,
  		      RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask);
  
  	call __twofish_dec_blk8;
  
  	/* dst <= regs xor IVs(in dst) */
  	store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
8691ccd76   Josh Poimboeuf   x86/asm/crypto: C...
432
  	FRAME_END
18be45270   Jussi Kivilinna   crypto: x86/twofi...
433
  	ret;
6dcc5627f   Jiri Slaby   x86/asm: Change a...
434
  SYM_FUNC_END(twofish_xts_dec_8way)