Text file
src/crypto/sha512/sha512block_ppc64le.s
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Based on CRYPTOGAMS code with the following comment:
6 // # ====================================================================
7 // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
8 // # project. The module is, however, dual licensed under OpenSSL and
9 // # CRYPTOGAMS licenses depending on where you obtain it. For further
10 // # details see http://www.openssl.org/~appro/cryptogams/.
11 // # ====================================================================
12
13 #include "textflag.h"
14
15 // SHA512 block routine. See sha512block.go for Go equivalent.
16 //
17 // The algorithm is detailed in FIPS 180-4:
18 //
19 // https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
20 //
21 // Wt = Mt; for 0 <= t <= 15
22 // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
23 //
24 // a = H0
25 // b = H1
26 // c = H2
27 // d = H3
28 // e = H4
29 // f = H5
30 // g = H6
31 // h = H7
32 //
33 // for t = 0 to 79 {
34 // T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
35 // T2 = BIGSIGMA0(a) + Maj(a,b,c)
36 // h = g
37 // g = f
38 // f = e
39 // e = d + T1
40 // d = c
41 // c = b
42 // b = a
43 // a = T1 + T2
44 // }
45 //
46 // H0 = a + H0
47 // H1 = b + H1
48 // H2 = c + H2
49 // H3 = d + H3
50 // H4 = e + H4
51 // H5 = f + H5
52 // H6 = g + H6
53 // H7 = h + H7
54
55 #define CTX R3
56 #define INP R4
57 #define END R5
58 #define TBL R6
59 #define IDX R7
60 #define CNT R8
61 #define LEN R9
62 #define OFFLOAD R11
63 #define TEMP R12
64
65 #define HEX00 R0
66 #define HEX10 R10
67 #define HEX20 R25
68 #define HEX30 R26
69 #define HEX40 R27
70 #define HEX50 R28
71 #define HEX60 R29
72 #define HEX70 R31
73
74 // V0-V7 are A-H
75 // V8-V23 are used for the message schedule
76 #define KI V24
77 #define FUNC V25
78 #define S0 V26
79 #define S1 V27
80 #define s0 V28
81 #define s1 V29
82 #define LEMASK V31 // Permutation control register for little endian
83
84 // 2 copies of each Kt, to fill both doublewords of a vector register
85 DATA ·kcon+0x000(SB)/8, $0x428a2f98d728ae22
86 DATA ·kcon+0x008(SB)/8, $0x428a2f98d728ae22
87 DATA ·kcon+0x010(SB)/8, $0x7137449123ef65cd
88 DATA ·kcon+0x018(SB)/8, $0x7137449123ef65cd
89 DATA ·kcon+0x020(SB)/8, $0xb5c0fbcfec4d3b2f
90 DATA ·kcon+0x028(SB)/8, $0xb5c0fbcfec4d3b2f
91 DATA ·kcon+0x030(SB)/8, $0xe9b5dba58189dbbc
92 DATA ·kcon+0x038(SB)/8, $0xe9b5dba58189dbbc
93 DATA ·kcon+0x040(SB)/8, $0x3956c25bf348b538
94 DATA ·kcon+0x048(SB)/8, $0x3956c25bf348b538
95 DATA ·kcon+0x050(SB)/8, $0x59f111f1b605d019
96 DATA ·kcon+0x058(SB)/8, $0x59f111f1b605d019
97 DATA ·kcon+0x060(SB)/8, $0x923f82a4af194f9b
98 DATA ·kcon+0x068(SB)/8, $0x923f82a4af194f9b
99 DATA ·kcon+0x070(SB)/8, $0xab1c5ed5da6d8118
100 DATA ·kcon+0x078(SB)/8, $0xab1c5ed5da6d8118
101 DATA ·kcon+0x080(SB)/8, $0xd807aa98a3030242
102 DATA ·kcon+0x088(SB)/8, $0xd807aa98a3030242
103 DATA ·kcon+0x090(SB)/8, $0x12835b0145706fbe
104 DATA ·kcon+0x098(SB)/8, $0x12835b0145706fbe
105 DATA ·kcon+0x0A0(SB)/8, $0x243185be4ee4b28c
106 DATA ·kcon+0x0A8(SB)/8, $0x243185be4ee4b28c
107 DATA ·kcon+0x0B0(SB)/8, $0x550c7dc3d5ffb4e2
108 DATA ·kcon+0x0B8(SB)/8, $0x550c7dc3d5ffb4e2
109 DATA ·kcon+0x0C0(SB)/8, $0x72be5d74f27b896f
110 DATA ·kcon+0x0C8(SB)/8, $0x72be5d74f27b896f
111 DATA ·kcon+0x0D0(SB)/8, $0x80deb1fe3b1696b1
112 DATA ·kcon+0x0D8(SB)/8, $0x80deb1fe3b1696b1
113 DATA ·kcon+0x0E0(SB)/8, $0x9bdc06a725c71235
114 DATA ·kcon+0x0E8(SB)/8, $0x9bdc06a725c71235
115 DATA ·kcon+0x0F0(SB)/8, $0xc19bf174cf692694
116 DATA ·kcon+0x0F8(SB)/8, $0xc19bf174cf692694
117 DATA ·kcon+0x100(SB)/8, $0xe49b69c19ef14ad2
118 DATA ·kcon+0x108(SB)/8, $0xe49b69c19ef14ad2
119 DATA ·kcon+0x110(SB)/8, $0xefbe4786384f25e3
120 DATA ·kcon+0x118(SB)/8, $0xefbe4786384f25e3
121 DATA ·kcon+0x120(SB)/8, $0x0fc19dc68b8cd5b5
122 DATA ·kcon+0x128(SB)/8, $0x0fc19dc68b8cd5b5
123 DATA ·kcon+0x130(SB)/8, $0x240ca1cc77ac9c65
124 DATA ·kcon+0x138(SB)/8, $0x240ca1cc77ac9c65
125 DATA ·kcon+0x140(SB)/8, $0x2de92c6f592b0275
126 DATA ·kcon+0x148(SB)/8, $0x2de92c6f592b0275
127 DATA ·kcon+0x150(SB)/8, $0x4a7484aa6ea6e483
128 DATA ·kcon+0x158(SB)/8, $0x4a7484aa6ea6e483
129 DATA ·kcon+0x160(SB)/8, $0x5cb0a9dcbd41fbd4
130 DATA ·kcon+0x168(SB)/8, $0x5cb0a9dcbd41fbd4
131 DATA ·kcon+0x170(SB)/8, $0x76f988da831153b5
132 DATA ·kcon+0x178(SB)/8, $0x76f988da831153b5
133 DATA ·kcon+0x180(SB)/8, $0x983e5152ee66dfab
134 DATA ·kcon+0x188(SB)/8, $0x983e5152ee66dfab
135 DATA ·kcon+0x190(SB)/8, $0xa831c66d2db43210
136 DATA ·kcon+0x198(SB)/8, $0xa831c66d2db43210
137 DATA ·kcon+0x1A0(SB)/8, $0xb00327c898fb213f
138 DATA ·kcon+0x1A8(SB)/8, $0xb00327c898fb213f
139 DATA ·kcon+0x1B0(SB)/8, $0xbf597fc7beef0ee4
140 DATA ·kcon+0x1B8(SB)/8, $0xbf597fc7beef0ee4
141 DATA ·kcon+0x1C0(SB)/8, $0xc6e00bf33da88fc2
142 DATA ·kcon+0x1C8(SB)/8, $0xc6e00bf33da88fc2
143 DATA ·kcon+0x1D0(SB)/8, $0xd5a79147930aa725
144 DATA ·kcon+0x1D8(SB)/8, $0xd5a79147930aa725
145 DATA ·kcon+0x1E0(SB)/8, $0x06ca6351e003826f
146 DATA ·kcon+0x1E8(SB)/8, $0x06ca6351e003826f
147 DATA ·kcon+0x1F0(SB)/8, $0x142929670a0e6e70
148 DATA ·kcon+0x1F8(SB)/8, $0x142929670a0e6e70
149 DATA ·kcon+0x200(SB)/8, $0x27b70a8546d22ffc
150 DATA ·kcon+0x208(SB)/8, $0x27b70a8546d22ffc
151 DATA ·kcon+0x210(SB)/8, $0x2e1b21385c26c926
152 DATA ·kcon+0x218(SB)/8, $0x2e1b21385c26c926
153 DATA ·kcon+0x220(SB)/8, $0x4d2c6dfc5ac42aed
154 DATA ·kcon+0x228(SB)/8, $0x4d2c6dfc5ac42aed
155 DATA ·kcon+0x230(SB)/8, $0x53380d139d95b3df
156 DATA ·kcon+0x238(SB)/8, $0x53380d139d95b3df
157 DATA ·kcon+0x240(SB)/8, $0x650a73548baf63de
158 DATA ·kcon+0x248(SB)/8, $0x650a73548baf63de
159 DATA ·kcon+0x250(SB)/8, $0x766a0abb3c77b2a8
160 DATA ·kcon+0x258(SB)/8, $0x766a0abb3c77b2a8
161 DATA ·kcon+0x260(SB)/8, $0x81c2c92e47edaee6
162 DATA ·kcon+0x268(SB)/8, $0x81c2c92e47edaee6
163 DATA ·kcon+0x270(SB)/8, $0x92722c851482353b
164 DATA ·kcon+0x278(SB)/8, $0x92722c851482353b
165 DATA ·kcon+0x280(SB)/8, $0xa2bfe8a14cf10364
166 DATA ·kcon+0x288(SB)/8, $0xa2bfe8a14cf10364
167 DATA ·kcon+0x290(SB)/8, $0xa81a664bbc423001
168 DATA ·kcon+0x298(SB)/8, $0xa81a664bbc423001
169 DATA ·kcon+0x2A0(SB)/8, $0xc24b8b70d0f89791
170 DATA ·kcon+0x2A8(SB)/8, $0xc24b8b70d0f89791
171 DATA ·kcon+0x2B0(SB)/8, $0xc76c51a30654be30
172 DATA ·kcon+0x2B8(SB)/8, $0xc76c51a30654be30
173 DATA ·kcon+0x2C0(SB)/8, $0xd192e819d6ef5218
174 DATA ·kcon+0x2C8(SB)/8, $0xd192e819d6ef5218
175 DATA ·kcon+0x2D0(SB)/8, $0xd69906245565a910
176 DATA ·kcon+0x2D8(SB)/8, $0xd69906245565a910
177 DATA ·kcon+0x2E0(SB)/8, $0xf40e35855771202a
178 DATA ·kcon+0x2E8(SB)/8, $0xf40e35855771202a
179 DATA ·kcon+0x2F0(SB)/8, $0x106aa07032bbd1b8
180 DATA ·kcon+0x2F8(SB)/8, $0x106aa07032bbd1b8
181 DATA ·kcon+0x300(SB)/8, $0x19a4c116b8d2d0c8
182 DATA ·kcon+0x308(SB)/8, $0x19a4c116b8d2d0c8
183 DATA ·kcon+0x310(SB)/8, $0x1e376c085141ab53
184 DATA ·kcon+0x318(SB)/8, $0x1e376c085141ab53
185 DATA ·kcon+0x320(SB)/8, $0x2748774cdf8eeb99
186 DATA ·kcon+0x328(SB)/8, $0x2748774cdf8eeb99
187 DATA ·kcon+0x330(SB)/8, $0x34b0bcb5e19b48a8
188 DATA ·kcon+0x338(SB)/8, $0x34b0bcb5e19b48a8
189 DATA ·kcon+0x340(SB)/8, $0x391c0cb3c5c95a63
190 DATA ·kcon+0x348(SB)/8, $0x391c0cb3c5c95a63
191 DATA ·kcon+0x350(SB)/8, $0x4ed8aa4ae3418acb
192 DATA ·kcon+0x358(SB)/8, $0x4ed8aa4ae3418acb
193 DATA ·kcon+0x360(SB)/8, $0x5b9cca4f7763e373
194 DATA ·kcon+0x368(SB)/8, $0x5b9cca4f7763e373
195 DATA ·kcon+0x370(SB)/8, $0x682e6ff3d6b2b8a3
196 DATA ·kcon+0x378(SB)/8, $0x682e6ff3d6b2b8a3
197 DATA ·kcon+0x380(SB)/8, $0x748f82ee5defb2fc
198 DATA ·kcon+0x388(SB)/8, $0x748f82ee5defb2fc
199 DATA ·kcon+0x390(SB)/8, $0x78a5636f43172f60
200 DATA ·kcon+0x398(SB)/8, $0x78a5636f43172f60
201 DATA ·kcon+0x3A0(SB)/8, $0x84c87814a1f0ab72
202 DATA ·kcon+0x3A8(SB)/8, $0x84c87814a1f0ab72
203 DATA ·kcon+0x3B0(SB)/8, $0x8cc702081a6439ec
204 DATA ·kcon+0x3B8(SB)/8, $0x8cc702081a6439ec
205 DATA ·kcon+0x3C0(SB)/8, $0x90befffa23631e28
206 DATA ·kcon+0x3C8(SB)/8, $0x90befffa23631e28
207 DATA ·kcon+0x3D0(SB)/8, $0xa4506cebde82bde9
208 DATA ·kcon+0x3D8(SB)/8, $0xa4506cebde82bde9
209 DATA ·kcon+0x3E0(SB)/8, $0xbef9a3f7b2c67915
210 DATA ·kcon+0x3E8(SB)/8, $0xbef9a3f7b2c67915
211 DATA ·kcon+0x3F0(SB)/8, $0xc67178f2e372532b
212 DATA ·kcon+0x3F8(SB)/8, $0xc67178f2e372532b
213 DATA ·kcon+0x400(SB)/8, $0xca273eceea26619c
214 DATA ·kcon+0x408(SB)/8, $0xca273eceea26619c
215 DATA ·kcon+0x410(SB)/8, $0xd186b8c721c0c207
216 DATA ·kcon+0x418(SB)/8, $0xd186b8c721c0c207
217 DATA ·kcon+0x420(SB)/8, $0xeada7dd6cde0eb1e
218 DATA ·kcon+0x428(SB)/8, $0xeada7dd6cde0eb1e
219 DATA ·kcon+0x430(SB)/8, $0xf57d4f7fee6ed178
220 DATA ·kcon+0x438(SB)/8, $0xf57d4f7fee6ed178
221 DATA ·kcon+0x440(SB)/8, $0x06f067aa72176fba
222 DATA ·kcon+0x448(SB)/8, $0x06f067aa72176fba
223 DATA ·kcon+0x450(SB)/8, $0x0a637dc5a2c898a6
224 DATA ·kcon+0x458(SB)/8, $0x0a637dc5a2c898a6
225 DATA ·kcon+0x460(SB)/8, $0x113f9804bef90dae
226 DATA ·kcon+0x468(SB)/8, $0x113f9804bef90dae
227 DATA ·kcon+0x470(SB)/8, $0x1b710b35131c471b
228 DATA ·kcon+0x478(SB)/8, $0x1b710b35131c471b
229 DATA ·kcon+0x480(SB)/8, $0x28db77f523047d84
230 DATA ·kcon+0x488(SB)/8, $0x28db77f523047d84
231 DATA ·kcon+0x490(SB)/8, $0x32caab7b40c72493
232 DATA ·kcon+0x498(SB)/8, $0x32caab7b40c72493
233 DATA ·kcon+0x4A0(SB)/8, $0x3c9ebe0a15c9bebc
234 DATA ·kcon+0x4A8(SB)/8, $0x3c9ebe0a15c9bebc
235 DATA ·kcon+0x4B0(SB)/8, $0x431d67c49c100d4c
236 DATA ·kcon+0x4B8(SB)/8, $0x431d67c49c100d4c
237 DATA ·kcon+0x4C0(SB)/8, $0x4cc5d4becb3e42b6
238 DATA ·kcon+0x4C8(SB)/8, $0x4cc5d4becb3e42b6
239 DATA ·kcon+0x4D0(SB)/8, $0x597f299cfc657e2a
240 DATA ·kcon+0x4D8(SB)/8, $0x597f299cfc657e2a
241 DATA ·kcon+0x4E0(SB)/8, $0x5fcb6fab3ad6faec
242 DATA ·kcon+0x4E8(SB)/8, $0x5fcb6fab3ad6faec
243 DATA ·kcon+0x4F0(SB)/8, $0x6c44198c4a475817
244 DATA ·kcon+0x4F8(SB)/8, $0x6c44198c4a475817
245 DATA ·kcon+0x500(SB)/8, $0x0000000000000000
246 DATA ·kcon+0x508(SB)/8, $0x0000000000000000
247 DATA ·kcon+0x510(SB)/8, $0x1011121314151617
248 DATA ·kcon+0x518(SB)/8, $0x0001020304050607
249 GLOBL ·kcon(SB), RODATA, $1312
250
251 #define SHA512ROUND0(a, b, c, d, e, f, g, h, xi) \
252 VSEL g, f, e, FUNC; \
253 VSHASIGMAD $15, e, $1, S1; \
254 VADDUDM xi, h, h; \
255 VSHASIGMAD $0, a, $1, S0; \
256 VADDUDM FUNC, h, h; \
257 VXOR b, a, FUNC; \
258 VADDUDM S1, h, h; \
259 VSEL b, c, FUNC, FUNC; \
260 VADDUDM KI, g, g; \
261 VADDUDM h, d, d; \
262 VADDUDM FUNC, S0, S0; \
263 LVX (TBL)(IDX), KI; \
264 ADD $16, IDX; \
265 VADDUDM S0, h, h
266
267 #define SHA512ROUND1(a, b, c, d, e, f, g, h, xi, xj, xj_1, xj_9, xj_14) \
268 VSHASIGMAD $0, xj_1, $0, s0; \
269 VSEL g, f, e, FUNC; \
270 VSHASIGMAD $15, e, $1, S1; \
271 VADDUDM xi, h, h; \
272 VSHASIGMAD $0, a, $1, S0; \
273 VSHASIGMAD $15, xj_14, $0, s1; \
274 VADDUDM FUNC, h, h; \
275 VXOR b, a, FUNC; \
276 VADDUDM xj_9, xj, xj; \
277 VADDUDM S1, h, h; \
278 VSEL b, c, FUNC, FUNC; \
279 VADDUDM KI, g, g; \
280 VADDUDM h, d, d; \
281 VADDUDM FUNC, S0, S0; \
282 VADDUDM s0, xj, xj; \
283 LVX (TBL)(IDX), KI; \
284 ADD $16, IDX; \
285 VADDUDM S0, h, h; \
286 VADDUDM s1, xj, xj
287
288 // func block(dig *digest, p []byte)
289 TEXT ·block(SB),0,$128-32
290 MOVD dig+0(FP), CTX
291 MOVD p_base+8(FP), INP
292 MOVD p_len+16(FP), LEN
293
294 SRD $6, LEN
295 SLD $6, LEN
296
297 ADD INP, LEN, END
298
299 CMP INP, END
300 BEQ end
301
302 MOVD $·kcon(SB), TBL
303 MOVD R1, OFFLOAD
304
305 MOVD R0, CNT
306 MOVWZ $0x10, HEX10
307 MOVWZ $0x20, HEX20
308 MOVWZ $0x30, HEX30
309 MOVWZ $0x40, HEX40
310 MOVWZ $0x50, HEX50
311 MOVWZ $0x60, HEX60
312 MOVWZ $0x70, HEX70
313
314 MOVWZ $8, IDX
315 LVSL (IDX)(R0), LEMASK
316 VSPLTISB $0x0F, KI
317 VXOR KI, LEMASK, LEMASK
318
319 LXVD2X (CTX)(HEX00), VS32 // v0 = vs32
320 LXVD2X (CTX)(HEX10), VS34 // v2 = vs34
321 LXVD2X (CTX)(HEX20), VS36 // v4 = vs36
322 // unpack the input values into vector registers
323 VSLDOI $8, V0, V0, V1
324 LXVD2X (CTX)(HEX30), VS38 // v6 = vs38
325 VSLDOI $8, V2, V2, V3
326 VSLDOI $8, V4, V4, V5
327 VSLDOI $8, V6, V6, V7
328
329 loop:
330 LVX (TBL)(HEX00), KI
331 MOVWZ $16, IDX
332
333 LXVD2X (INP)(R0), VS40 // load v8 (=vs40) in advance
334 ADD $16, INP
335
336 STVX V0, (OFFLOAD+HEX00)
337 STVX V1, (OFFLOAD+HEX10)
338 STVX V2, (OFFLOAD+HEX20)
339 STVX V3, (OFFLOAD+HEX30)
340 STVX V4, (OFFLOAD+HEX40)
341 STVX V5, (OFFLOAD+HEX50)
342 STVX V6, (OFFLOAD+HEX60)
343 STVX V7, (OFFLOAD+HEX70)
344
345 VADDUDM KI, V7, V7 // h+K[i]
346 LVX (TBL)(IDX), KI
347 ADD $16, IDX
348
349 VPERM V8, V8, LEMASK, V8
350 SHA512ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V8)
351 LXVD2X (INP)(R0), VS42 // load v10 (=vs42) in advance
352 ADD $16, INP, INP
353 VSLDOI $8, V8, V8, V9
354 SHA512ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V9)
355 VPERM V10, V10, LEMASK, V10
356 SHA512ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V10)
357 LXVD2X (INP)(R0), VS44 // load v12 (=vs44) in advance
358 ADD $16, INP, INP
359 VSLDOI $8, V10, V10, V11
360 SHA512ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V11)
361 VPERM V12, V12, LEMASK, V12
362 SHA512ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V12)
363 LXVD2X (INP)(R0), VS46 // load v14 (=vs46) in advance
364 ADD $16, INP, INP
365 VSLDOI $8, V12, V12, V13
366 SHA512ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V13)
367 VPERM V14, V14, LEMASK, V14
368 SHA512ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V14)
369 LXVD2X (INP)(R0), VS48 // load v16 (=vs48) in advance
370 ADD $16, INP, INP
371 VSLDOI $8, V14, V14, V15
372 SHA512ROUND0(V1, V2, V3, V4, V5, V6, V7, V0, V15)
373 VPERM V16, V16, LEMASK, V16
374 SHA512ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V16)
375 LXVD2X (INP)(R0), VS50 // load v18 (=vs50) in advance
376 ADD $16, INP, INP
377 VSLDOI $8, V16, V16, V17
378 SHA512ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V17)
379 VPERM V18, V18, LEMASK, V18
380 SHA512ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V18)
381 LXVD2X (INP)(R0), VS52 // load v20 (=vs52) in advance
382 ADD $16, INP, INP
383 VSLDOI $8, V18, V18, V19
384 SHA512ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V19)
385 VPERM V20, V20, LEMASK, V20
386 SHA512ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V20)
387 LXVD2X (INP)(R0), VS54 // load v22 (=vs54) in advance
388 ADD $16, INP, INP
389 VSLDOI $8, V20, V20, V21
390 SHA512ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V21)
391 VPERM V22, V22, LEMASK, V22
392 SHA512ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V22)
393 VSLDOI $8, V22, V22, V23
394 SHA512ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22)
395
396 MOVWZ $4, TEMP
397 MOVWZ TEMP, CTR
398
399 L16_xx:
400 SHA512ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V18, V23)
401 SHA512ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V9, V10, V11, V19, V8)
402 SHA512ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V10, V11, V12, V20, V9)
403 SHA512ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V11, V12, V13, V21, V10)
404 SHA512ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V12, V13, V14, V22, V11)
405 SHA512ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V13, V14, V15, V23, V12)
406 SHA512ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V14, V15, V16, V8, V13)
407 SHA512ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V15, V16, V17, V9, V14)
408 SHA512ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V16, V17, V18, V10, V15)
409 SHA512ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V17, V18, V19, V11, V16)
410 SHA512ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V18, V19, V20, V12, V17)
411 SHA512ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V19, V20, V21, V13, V18)
412 SHA512ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V20, V21, V22, V14, V19)
413 SHA512ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V21, V22, V23, V15, V20)
414 SHA512ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V22, V23, V8, V16, V21)
415 SHA512ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22)
416
417 BC 0x10, 0, L16_xx // bdnz
418
419 LVX (OFFLOAD)(HEX00), V10
420
421 LVX (OFFLOAD)(HEX10), V11
422 VADDUDM V10, V0, V0
423 LVX (OFFLOAD)(HEX20), V12
424 VADDUDM V11, V1, V1
425 LVX (OFFLOAD)(HEX30), V13
426 VADDUDM V12, V2, V2
427 LVX (OFFLOAD)(HEX40), V14
428 VADDUDM V13, V3, V3
429 LVX (OFFLOAD)(HEX50), V15
430 VADDUDM V14, V4, V4
431 LVX (OFFLOAD)(HEX60), V16
432 VADDUDM V15, V5, V5
433 LVX (OFFLOAD)(HEX70), V17
434 VADDUDM V16, V6, V6
435 VADDUDM V17, V7, V7
436
437 CMPU INP, END
438 BLT loop
439
440 VPERM V0, V1, KI, V0
441 VPERM V2, V3, KI, V2
442 VPERM V4, V5, KI, V4
443 VPERM V6, V7, KI, V6
444 STXVD2X VS32, (CTX+HEX00) // v0 = vs32
445 STXVD2X VS34, (CTX+HEX10) // v2 = vs34
446 STXVD2X VS36, (CTX+HEX20) // v4 = vs36
447 STXVD2X VS38, (CTX+HEX30) // v6 = vs38
448
449 end:
450 RET
451
452
View as plain text