Text file
src/crypto/aes/asm_ppc64le.s
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Based on CRYPTOGAMS code with the following comment:
6 // # ====================================================================
7 // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
8 // # project. The module is, however, dual licensed under OpenSSL and
9 // # CRYPTOGAMS licenses depending on where you obtain it. For further
10 // # details see http://www.openssl.org/~appro/cryptogams/.
11 // # ====================================================================
12
13 // Original code can be found at the link below:
14 // https://github.com/dot-asm/cryptogams/blob/master/ppc/aesp8-ppc.pl
15
16 // Some function names were changed to be consistent with Go function
17 // names. For instance, function aes_p8_set_{en,de}crypt_key become
18 // set{En,De}cryptKeyAsm. I also split setEncryptKeyAsm in two parts
19 // and a new session was created (doEncryptKeyAsm). This was necessary to
20 // avoid arguments overwriting when setDecryptKeyAsm calls setEncryptKeyAsm.
21 // There were other modifications as well but kept the same functionality.
22
23 #include "textflag.h"
24
25 // For set{En,De}cryptKeyAsm
26 #define INP R3
27 #define BITS R4
28 #define OUT R5
29 #define PTR R6
30 #define CNT R7
31 #define ROUNDS R8
32 #define TEMP R19
33 #define ZERO V0
34 #define IN0 V1
35 #define IN1 V2
36 #define KEY V3
37 #define RCON V4
38 #define MASK V5
39 #define TMP V6
40 #define STAGE V7
41 #define OUTPERM V8
42 #define OUTMASK V9
43 #define OUTHEAD V10
44 #define OUTTAIL V11
45
46 // For {en,de}cryptBlockAsm
47 #define BLK_INP R3
48 #define BLK_OUT R4
49 #define BLK_KEY R5
50 #define BLK_ROUNDS R6
51 #define BLK_IDX R7
52
53 DATA ·rcon+0x00(SB)/8, $0x0100000001000000 // RCON
54 DATA ·rcon+0x08(SB)/8, $0x0100000001000000 // RCON
55 DATA ·rcon+0x10(SB)/8, $0x1b0000001b000000
56 DATA ·rcon+0x18(SB)/8, $0x1b0000001b000000
57 DATA ·rcon+0x20(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
58 DATA ·rcon+0x28(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
59 DATA ·rcon+0x30(SB)/8, $0x0000000000000000
60 DATA ·rcon+0x38(SB)/8, $0x0000000000000000
61 GLOBL ·rcon(SB), RODATA, $64
62
63 // func setEncryptKeyAsm(key *byte, keylen int, enc *uint32) int
64 TEXT ·setEncryptKeyAsm(SB), NOSPLIT|NOFRAME, $0
65 // Load the arguments inside the registers
66 MOVD key+0(FP), INP
67 MOVD keylen+8(FP), BITS
68 MOVD enc+16(FP), OUT
69 JMP ·doEncryptKeyAsm(SB)
70
71 // This text is used both setEncryptKeyAsm and setDecryptKeyAsm
72 TEXT ·doEncryptKeyAsm(SB), NOSPLIT|NOFRAME, $0
73 // Do not change R10 since it's storing the LR value in setDecryptKeyAsm
74
75 // Check arguments
76 MOVD $-1, PTR // li 6,-1 exit code to -1 (255)
77 CMPU INP, $0 // cmpldi r3,0 input key pointer set?
78 BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort
79 CMPU OUT, $0 // cmpldi r5,0 output key pointer set?
80 BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort
81 MOVD $-2, PTR // li 6,-2 exit code to -2 (254)
82 CMPW BITS, $128 // cmpwi 4,128 greater or equal to 128
83 BC 0x0E, 0, enc_key_abort // blt- .Lenc_key_abort
84 CMPW BITS, $256 // cmpwi 4,256 lesser or equal to 256
85 BC 0x0E, 1, enc_key_abort // bgt- .Lenc_key_abort
86 ANDCC $0x3f, BITS, TEMP // andi. 0,4,0x3f multiple of 64
87 BC 0x06, 2, enc_key_abort // bne- .Lenc_key_abort
88
89 MOVD $·rcon(SB), PTR // PTR point to rcon addr
90
91 // Get key from memory and write aligned into VR
92 NEG INP, R9 // neg 9,3 R9 is ~INP + 1
93 LVX (INP)(R0), IN0 // lvx 1,0,3 Load key inside IN0
94 ADD $15, INP, INP // addi 3,3,15 Add 15B to INP addr
95 LVSR (R9)(R0), KEY // lvsr 3,0,9
96 MOVD $0x20, R8 // li 8,0x20 R8 = 32
97 CMPW BITS, $192 // cmpwi 4,192 Key size == 192?
98 LVX (INP)(R0), IN1 // lvx 2,0,3
99 VSPLTISB $0x0f, MASK// vspltisb 5,0x0f 0x0f0f0f0f... mask
100 LVX (PTR)(R0), RCON // lvx 4,0,6 Load first 16 bytes into RCON
101 VXOR KEY, MASK, KEY // vxor 3,3,5 Adjust for byte swap
102 LVX (PTR)(R8), MASK // lvx 5,8,6
103 ADD $0x10, PTR, PTR // addi 6,6,0x10 PTR to next 16 bytes of RCON
104 VPERM IN0, IN1, KEY, IN0 // vperm 1,1,2,3 Align
105 MOVD $8, CNT // li 7,8 CNT = 8
106 VXOR ZERO, ZERO, ZERO // vxor 0,0,0 Zero to be zero :)
107 MOVD CNT, CTR // mtctr 7 Set the counter to 8 (rounds)
108
109 LVSL (OUT)(R0), OUTPERM // lvsl 8,0,5
110 VSPLTISB $-1, OUTMASK // vspltisb 9,-1
111 LVX (OUT)(R0), OUTHEAD // lvx 10,0,5
112 VPERM OUTMASK, ZERO, OUTPERM, OUTMASK // vperm 9,9,0,8
113
114 BLT loop128 // blt .Loop128
115 ADD $8, INP, INP // addi 3,3,8
116 BEQ l192 // beq .L192
117 ADD $8, INP, INP // addi 3,3,8
118 JMP l256 // b .L256
119
120 loop128:
121 // Key schedule (Round 1 to 8)
122 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat
123 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
124 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate
125 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
126 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
127 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
128 STVX STAGE, (OUT+R0) // stvx 7,0,5 Write to output
129 ADD $16, OUT, OUT // addi 5,5,16 Point to the next round
130
131 VXOR IN0, TMP, IN0 // vxor 1,1,6
132 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
133 VXOR IN0, TMP, IN0 // vxor 1,1,6
134 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
135 VXOR IN0, TMP, IN0 // vxor 1,1,6
136 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
137 VXOR IN0, KEY, IN0 // vxor 1,1,3
138 BC 0x10, 0, loop128 // bdnz .Loop128
139
140 LVX (PTR)(R0), RCON // lvx 4,0,6 Last two round keys
141
142 // Key schedule (Round 9)
143 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-spat
144 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
145 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate
146 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
147 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
148 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
149 STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 9
150 ADD $16, OUT, OUT // addi 5,5,16
151
152 // Key schedule (Round 10)
153 VXOR IN0, TMP, IN0 // vxor 1,1,6
154 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
155 VXOR IN0, TMP, IN0 // vxor 1,1,6
156 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
157 VXOR IN0, TMP, IN0 // vxor 1,1,6
158 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
159 VXOR IN0, KEY, IN0 // vxor 1,1,3
160
161 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat
162 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
163 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate
164 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
165 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
166 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
167 STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 10
168 ADD $16, OUT, OUT // addi 5,5,16
169
170 // Key schedule (Round 11)
171 VXOR IN0, TMP, IN0 // vxor 1,1,6
172 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
173 VXOR IN0, TMP, IN0 // vxor 1,1,6
174 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
175 VXOR IN0, TMP, IN0 // vxor 1,1,6
176 VXOR IN0, KEY, IN0 // vxor 1,1,3
177 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
178 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
179 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
180 STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 11
181
182 ADD $15, OUT, INP // addi 3,5,15
183 ADD $0x50, OUT, OUT // addi 5,5,0x50
184
185 MOVD $10, ROUNDS // li 8,10
186 JMP done // b .Ldone
187
188 l192:
189 LVX (INP)(R0), TMP // lvx 6,0,3
190 MOVD $4, CNT // li 7,4
191 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
192 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
193 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
194 STVX STAGE, (OUT+R0) // stvx 7,0,5
195 ADD $16, OUT, OUT // addi 5,5,16
196 VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3
197 VSPLTISB $8, KEY // vspltisb 3,8
198 MOVD CNT, CTR // mtctr 7
199 VSUBUBM MASK, KEY, MASK // vsububm 5,5,3
200
201 loop192:
202 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5
203 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
204 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
205
206 VXOR IN0, TMP, IN0 // vxor 1,1,6
207 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
208 VXOR IN0, TMP, IN0 // vxor 1,1,6
209 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
210 VXOR IN0, TMP, IN0 // vxor 1,1,6
211
212 VSLDOI $8, ZERO, IN1, STAGE // vsldoi 7,0,2,8
213 VSPLTW $3, IN0, TMP // vspltw 6,1,3
214 VXOR TMP, IN1, TMP // vxor 6,6,2
215 VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12
216 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
217 VXOR IN1, TMP, IN1 // vxor 2,2,6
218 VXOR IN0, KEY, IN0 // vxor 1,1,3
219 VXOR IN1, KEY, IN1 // vxor 2,2,3
220 VSLDOI $8, STAGE, IN0, STAGE // vsldoi 7,7,1,8
221
222 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5
223 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
224 VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8
225 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
226 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
227 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
228 STVX STAGE, (OUT+R0) // stvx 7,0,5
229 ADD $16, OUT, OUT // addi 5,5,16
230
231 VSLDOI $8, IN0, IN1, STAGE // vsldoi 7,1,2,8
232 VXOR IN0, TMP, IN0 // vxor 1,1,6
233 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
234 VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8
235 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
236 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
237 VXOR IN0, TMP, IN0 // vxor 1,1,6
238 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
239 VXOR IN0, TMP, IN0 // vxor 1,1,6
240 STVX STAGE, (OUT+R0) // stvx 7,0,5
241 ADD $16, OUT, OUT // addi 5,5,16
242
243 VSPLTW $3, IN0, TMP // vspltw 6,1,3
244 VXOR TMP, IN1, TMP // vxor 6,6,2
245 VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12
246 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
247 VXOR IN1, TMP, IN1 // vxor 2,2,6
248 VXOR IN0, KEY, IN0 // vxor 1,1,3
249 VXOR IN1, KEY, IN1 // vxor 2,2,3
250 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
251 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
252 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
253 STVX STAGE, (OUT+R0) // stvx 7,0,5
254 ADD $15, OUT, INP // addi 3,5,15
255 ADD $16, OUT, OUT // addi 5,5,16
256 BC 0x10, 0, loop192 // bdnz .Loop192
257
258 MOVD $12, ROUNDS // li 8,12
259 ADD $0x20, OUT, OUT // addi 5,5,0x20
260 BR done // b .Ldone
261
262 l256:
263 LVX (INP)(R0), TMP // lvx 6,0,3
264 MOVD $7, CNT // li 7,7
265 MOVD $14, ROUNDS // li 8,14
266 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
267 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
268 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
269 STVX STAGE, (OUT+R0) // stvx 7,0,5
270 ADD $16, OUT, OUT // addi 5,5,16
271 VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3
272 MOVD CNT, CTR // mtctr 7
273
274 loop256:
275 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5
276 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
277 VPERM IN1, IN1, OUTPERM, OUTTAIL // vperm 11,2,2,8
278 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
279 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
280 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
281 STVX STAGE, (OUT+R0) // stvx 7,0,5
282 ADD $16, OUT, OUT // addi 5,5,16
283
284 VXOR IN0, TMP, IN0 // vxor 1,1,6
285 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
286 VXOR IN0, TMP, IN0 // vxor 1,1,6
287 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
288 VXOR IN0, TMP, IN0 // vxor 1,1,6
289 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
290 VXOR IN0, KEY, IN0 // vxor 1,1,3
291 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
292 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
293 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
294 STVX STAGE, (OUT+R0) // stvx 7,0,5
295 ADD $15, OUT, INP // addi 3,5,15
296 ADD $16, OUT, OUT // addi 5,5,16
297 BC 0x12, 0, done // bdz .Ldone
298
299 VSPLTW $3, IN0, KEY // vspltw 3,1,3
300 VSLDOI $12, ZERO, IN1, TMP // vsldoi 6,0,2,12
301 VSBOX KEY, KEY // vsbox 3,3
302
303 VXOR IN1, TMP, IN1 // vxor 2,2,6
304 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
305 VXOR IN1, TMP, IN1 // vxor 2,2,6
306 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
307 VXOR IN1, TMP, IN1 // vxor 2,2,6
308
309 VXOR IN1, KEY, IN1 // vxor 2,2,3
310 JMP loop256 // b .Loop256
311
312 done:
313 LVX (INP)(R0), IN1 // lvx 2,0,3
314 VSEL OUTHEAD, IN1, OUTMASK, IN1 // vsel 2,10,2,9
315 STVX IN1, (INP+R0) // stvx 2,0,3
316 MOVD $0, PTR // li 6,0 set PTR to 0 (exit code 0)
317 MOVW ROUNDS, 0(OUT) // stw 8,0(5)
318
319 enc_key_abort:
320 MOVD PTR, INP // mr 3,6 set exit code with PTR value
321 MOVD INP, ret+24(FP) // Put return value into the FP
322 RET // blr
323
324 // func setDecryptKeyAsm(key *byte, keylen int, dec *uint32) int
325 TEXT ·setDecryptKeyAsm(SB), NOSPLIT|NOFRAME, $0
326 // Load the arguments inside the registers
327 MOVD key+0(FP), INP
328 MOVD keylen+8(FP), BITS
329 MOVD dec+16(FP), OUT
330
331 MOVD LR, R10 // mflr 10
332 CALL ·doEncryptKeyAsm(SB)
333 MOVD R10, LR // mtlr 10
334
335 CMPW INP, $0 // cmpwi 3,0 exit 0 = ok
336 BC 0x06, 2, dec_key_abort // bne- .Ldec_key_abort
337
338 // doEncryptKeyAsm set ROUNDS (R8) with the proper value for each mode
339 SLW $4, ROUNDS, CNT // slwi 7,8,4
340 SUB $240, OUT, INP // subi 3,5,240
341 SRW $1, ROUNDS, ROUNDS // srwi 8,8,1
342 ADD R7, INP, OUT // add 5,3,7
343 MOVD ROUNDS, CTR // mtctr 8
344
345 // dec_key will invert the key sequence in order to be used for decrypt
346 dec_key:
347 MOVWZ 0(INP), TEMP // lwz 0, 0(3)
348 MOVWZ 4(INP), R6 // lwz 6, 4(3)
349 MOVWZ 8(INP), R7 // lwz 7, 8(3)
350 MOVWZ 12(INP), R8 // lwz 8, 12(3)
351 ADD $16, INP, INP // addi 3,3,16
352 MOVWZ 0(OUT), R9 // lwz 9, 0(5)
353 MOVWZ 4(OUT), R10 // lwz 10,4(5)
354 MOVWZ 8(OUT), R11 // lwz 11,8(5)
355 MOVWZ 12(OUT), R12 // lwz 12,12(5)
356 MOVW TEMP, 0(OUT) // stw 0, 0(5)
357 MOVW R6, 4(OUT) // stw 6, 4(5)
358 MOVW R7, 8(OUT) // stw 7, 8(5)
359 MOVW R8, 12(OUT) // stw 8, 12(5)
360 SUB $16, OUT, OUT // subi 5,5,16
361 MOVW R9, -16(INP) // stw 9, -16(3)
362 MOVW R10, -12(INP) // stw 10,-12(3)
363 MOVW R11, -8(INP) // stw 11,-8(3)
364 MOVW R12, -4(INP) // stw 12,-4(3)
365 BC 0x10, 0, dec_key // bdnz .Ldeckey
366
367 XOR R3, R3, R3 // xor 3,3,3 Clean R3
368
369 dec_key_abort:
370 MOVD R3, ret+24(FP) // Put return value into the FP
371 RET // blr
372
373 // func encryptBlockAsm(dst, src *byte, enc *uint32)
374 TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
375 // Load the arguments inside the registers
376 MOVD dst+0(FP), BLK_OUT
377 MOVD src+8(FP), BLK_INP
378 MOVD enc+16(FP), BLK_KEY
379
380 MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5)
381 MOVD $15, BLK_IDX // li 7,15
382
383 LVX (BLK_INP)(R0), ZERO // lvx 0,0,3
384 NEG BLK_OUT, R11 // neg 11,4
385 LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3
386 LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3
387 VSPLTISB $0x0f, RCON // vspltisb 4,0x0f
388 LVSR (R11)(R0), KEY // lvsr 3,0,11
389 VXOR IN1, RCON, IN1 // vxor 2,2,4
390 MOVD $16, BLK_IDX // li 7,16
391 VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2
392 LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5
393 LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5
394 SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1
395 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
396 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
397 SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1
398 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
399
400 VXOR ZERO, IN0, ZERO // vxor 0,0,1
401 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
402 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
403 MOVD BLK_ROUNDS, CTR // mtctr 6
404
405 loop_enc:
406 VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5
407 VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2
408 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
409 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
410 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
411 VCIPHER ZERO, IN0, ZERO // vcipher 0,0,1
412 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
413 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
414 BC 0x10, 0, loop_enc // bdnz .Loop_enc
415
416 VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5
417 VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2
418 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
419 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
420 VCIPHERLAST ZERO, IN0, ZERO // vcipherlast 0,0,1
421
422 VSPLTISB $-1, IN1 // vspltisb 2,-1
423 VXOR IN0, IN0, IN0 // vxor 1,1,1
424 MOVD $15, BLK_IDX // li 7,15
425 VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3
426 VXOR KEY, RCON, KEY // vxor 3,3,4
427 LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4
428 VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3
429 VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2
430 LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4
431 STVX IN0, (BLK_OUT+R0) // stvx 1,0,4
432 VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2
433 STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4
434
435 RET // blr
436
437 // func decryptBlockAsm(dst, src *byte, dec *uint32)
438 TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
439 // Load the arguments inside the registers
440 MOVD dst+0(FP), BLK_OUT
441 MOVD src+8(FP), BLK_INP
442 MOVD dec+16(FP), BLK_KEY
443
444 MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5)
445 MOVD $15, BLK_IDX // li 7,15
446
447 LVX (BLK_INP)(R0), ZERO // lvx 0,0,3
448 NEG BLK_OUT, R11 // neg 11,4
449 LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3
450 LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3
451 VSPLTISB $0x0f, RCON // vspltisb 4,0x0f
452 LVSR (R11)(R0), KEY // lvsr 3,0,11
453 VXOR IN1, RCON, IN1 // vxor 2,2,4
454 MOVD $16, BLK_IDX // li 7,16
455 VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2
456 LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5
457 LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5
458 SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1
459 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
460 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
461 SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1
462 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
463
464 VXOR ZERO, IN0, ZERO // vxor 0,0,1
465 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
466 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
467 MOVD BLK_ROUNDS, CTR // mtctr 6
468
469 loop_dec:
470 VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5
471 VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2
472 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
473 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
474 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
475 VNCIPHER ZERO, IN0, ZERO // vncipher 0,0,1
476 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
477 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
478 BC 0x10, 0, loop_dec // bdnz .Loop_dec
479
480 VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5
481 VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2
482 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
483 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
484 VNCIPHERLAST ZERO, IN0, ZERO // vncipherlast 0,0,1
485
486 VSPLTISB $-1, IN1 // vspltisb 2,-1
487 VXOR IN0, IN0, IN0 // vxor 1,1,1
488 MOVD $15, BLK_IDX // li 7,15
489 VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3
490 VXOR KEY, RCON, KEY // vxor 3,3,4
491 LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4
492 VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3
493 VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2
494 LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4
495 STVX IN0, (BLK_OUT+R0) // stvx 1,0,4
496 VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2
497 STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4
498
499 RET // blr
500
501
View as plain text