Text file src/crypto/sha256/sha256block_ppc64le.s

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Based on CRYPTOGAMS code with the following comment:
     6  // # ====================================================================
     7  // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
     8  // # project. The module is, however, dual licensed under OpenSSL and
     9  // # CRYPTOGAMS licenses depending on where you obtain it. For further
    10  // # details see http://www.openssl.org/~appro/cryptogams/.
    11  // # ====================================================================
    12  
    13  #include "textflag.h"
    14  
    15  // SHA256 block routine. See sha256block.go for Go equivalent.
    16  //
    17  // The algorithm is detailed in FIPS 180-4:
    18  //
    19  //  https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    20  //
    21  // Wt = Mt; for 0 <= t <= 15
    22  // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
    23  //
    24  // a = H0
    25  // b = H1
    26  // c = H2
    27  // d = H3
    28  // e = H4
    29  // f = H5
    30  // g = H6
    31  // h = H7
    32  //
    33  // for t = 0 to 63 {
    34  //    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
    35  //    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    36  //    h = g
    37  //    g = f
    38  //    f = e
    39  //    e = d + T1
    40  //    d = c
    41  //    c = b
    42  //    b = a
    43  //    a = T1 + T2
    44  // }
    45  //
    46  // H0 = a + H0
    47  // H1 = b + H1
    48  // H2 = c + H2
    49  // H3 = d + H3
    50  // H4 = e + H4
    51  // H5 = f + H5
    52  // H6 = g + H6
    53  // H7 = h + H7
    54  
    55  #define CTX	R3
    56  #define INP	R4
    57  #define END	R5
    58  #define TBL	R6
    59  #define IDX	R7
    60  #define CNT	R8
    61  #define LEN	R9
    62  #define OFFLOAD	R11
    63  #define TEMP	R12
    64  
    65  #define HEX00	R0
    66  #define HEX10	R10
    67  #define HEX20	R25
    68  #define HEX30	R26
    69  #define HEX40	R27
    70  #define HEX50	R28
    71  #define HEX60	R29
    72  #define HEX70	R31
    73  
    74  // V0-V7 are A-H
    75  // V8-V23 are used for the message schedule
    76  #define KI	V24
    77  #define FUNC	V25
    78  #define S0	V26
    79  #define S1	V27
    80  #define s0	V28
    81  #define s1	V29
    82  #define LEMASK	V31	// Permutation control register for little endian
    83  
    84  // 4 copies of each Kt, to fill all 4 words of a vector register
    85  DATA  ·kcon+0x000(SB)/8, $0x428a2f98428a2f98
    86  DATA  ·kcon+0x008(SB)/8, $0x428a2f98428a2f98
    87  DATA  ·kcon+0x010(SB)/8, $0x7137449171374491
    88  DATA  ·kcon+0x018(SB)/8, $0x7137449171374491
    89  DATA  ·kcon+0x020(SB)/8, $0xb5c0fbcfb5c0fbcf
    90  DATA  ·kcon+0x028(SB)/8, $0xb5c0fbcfb5c0fbcf
    91  DATA  ·kcon+0x030(SB)/8, $0xe9b5dba5e9b5dba5
    92  DATA  ·kcon+0x038(SB)/8, $0xe9b5dba5e9b5dba5
    93  DATA  ·kcon+0x040(SB)/8, $0x3956c25b3956c25b
    94  DATA  ·kcon+0x048(SB)/8, $0x3956c25b3956c25b
    95  DATA  ·kcon+0x050(SB)/8, $0x59f111f159f111f1
    96  DATA  ·kcon+0x058(SB)/8, $0x59f111f159f111f1
    97  DATA  ·kcon+0x060(SB)/8, $0x923f82a4923f82a4
    98  DATA  ·kcon+0x068(SB)/8, $0x923f82a4923f82a4
    99  DATA  ·kcon+0x070(SB)/8, $0xab1c5ed5ab1c5ed5
   100  DATA  ·kcon+0x078(SB)/8, $0xab1c5ed5ab1c5ed5
   101  DATA  ·kcon+0x080(SB)/8, $0xd807aa98d807aa98
   102  DATA  ·kcon+0x088(SB)/8, $0xd807aa98d807aa98
   103  DATA  ·kcon+0x090(SB)/8, $0x12835b0112835b01
   104  DATA  ·kcon+0x098(SB)/8, $0x12835b0112835b01
   105  DATA  ·kcon+0x0A0(SB)/8, $0x243185be243185be
   106  DATA  ·kcon+0x0A8(SB)/8, $0x243185be243185be
   107  DATA  ·kcon+0x0B0(SB)/8, $0x550c7dc3550c7dc3
   108  DATA  ·kcon+0x0B8(SB)/8, $0x550c7dc3550c7dc3
   109  DATA  ·kcon+0x0C0(SB)/8, $0x72be5d7472be5d74
   110  DATA  ·kcon+0x0C8(SB)/8, $0x72be5d7472be5d74
   111  DATA  ·kcon+0x0D0(SB)/8, $0x80deb1fe80deb1fe
   112  DATA  ·kcon+0x0D8(SB)/8, $0x80deb1fe80deb1fe
   113  DATA  ·kcon+0x0E0(SB)/8, $0x9bdc06a79bdc06a7
   114  DATA  ·kcon+0x0E8(SB)/8, $0x9bdc06a79bdc06a7
   115  DATA  ·kcon+0x0F0(SB)/8, $0xc19bf174c19bf174
   116  DATA  ·kcon+0x0F8(SB)/8, $0xc19bf174c19bf174
   117  DATA  ·kcon+0x100(SB)/8, $0xe49b69c1e49b69c1
   118  DATA  ·kcon+0x108(SB)/8, $0xe49b69c1e49b69c1
   119  DATA  ·kcon+0x110(SB)/8, $0xefbe4786efbe4786
   120  DATA  ·kcon+0x118(SB)/8, $0xefbe4786efbe4786
   121  DATA  ·kcon+0x120(SB)/8, $0x0fc19dc60fc19dc6
   122  DATA  ·kcon+0x128(SB)/8, $0x0fc19dc60fc19dc6
   123  DATA  ·kcon+0x130(SB)/8, $0x240ca1cc240ca1cc
   124  DATA  ·kcon+0x138(SB)/8, $0x240ca1cc240ca1cc
   125  DATA  ·kcon+0x140(SB)/8, $0x2de92c6f2de92c6f
   126  DATA  ·kcon+0x148(SB)/8, $0x2de92c6f2de92c6f
   127  DATA  ·kcon+0x150(SB)/8, $0x4a7484aa4a7484aa
   128  DATA  ·kcon+0x158(SB)/8, $0x4a7484aa4a7484aa
   129  DATA  ·kcon+0x160(SB)/8, $0x5cb0a9dc5cb0a9dc
   130  DATA  ·kcon+0x168(SB)/8, $0x5cb0a9dc5cb0a9dc
   131  DATA  ·kcon+0x170(SB)/8, $0x76f988da76f988da
   132  DATA  ·kcon+0x178(SB)/8, $0x76f988da76f988da
   133  DATA  ·kcon+0x180(SB)/8, $0x983e5152983e5152
   134  DATA  ·kcon+0x188(SB)/8, $0x983e5152983e5152
   135  DATA  ·kcon+0x190(SB)/8, $0xa831c66da831c66d
   136  DATA  ·kcon+0x198(SB)/8, $0xa831c66da831c66d
   137  DATA  ·kcon+0x1A0(SB)/8, $0xb00327c8b00327c8
   138  DATA  ·kcon+0x1A8(SB)/8, $0xb00327c8b00327c8
   139  DATA  ·kcon+0x1B0(SB)/8, $0xbf597fc7bf597fc7
   140  DATA  ·kcon+0x1B8(SB)/8, $0xbf597fc7bf597fc7
   141  DATA  ·kcon+0x1C0(SB)/8, $0xc6e00bf3c6e00bf3
   142  DATA  ·kcon+0x1C8(SB)/8, $0xc6e00bf3c6e00bf3
   143  DATA  ·kcon+0x1D0(SB)/8, $0xd5a79147d5a79147
   144  DATA  ·kcon+0x1D8(SB)/8, $0xd5a79147d5a79147
   145  DATA  ·kcon+0x1E0(SB)/8, $0x06ca635106ca6351
   146  DATA  ·kcon+0x1E8(SB)/8, $0x06ca635106ca6351
   147  DATA  ·kcon+0x1F0(SB)/8, $0x1429296714292967
   148  DATA  ·kcon+0x1F8(SB)/8, $0x1429296714292967
   149  DATA  ·kcon+0x200(SB)/8, $0x27b70a8527b70a85
   150  DATA  ·kcon+0x208(SB)/8, $0x27b70a8527b70a85
   151  DATA  ·kcon+0x210(SB)/8, $0x2e1b21382e1b2138
   152  DATA  ·kcon+0x218(SB)/8, $0x2e1b21382e1b2138
   153  DATA  ·kcon+0x220(SB)/8, $0x4d2c6dfc4d2c6dfc
   154  DATA  ·kcon+0x228(SB)/8, $0x4d2c6dfc4d2c6dfc
   155  DATA  ·kcon+0x230(SB)/8, $0x53380d1353380d13
   156  DATA  ·kcon+0x238(SB)/8, $0x53380d1353380d13
   157  DATA  ·kcon+0x240(SB)/8, $0x650a7354650a7354
   158  DATA  ·kcon+0x248(SB)/8, $0x650a7354650a7354
   159  DATA  ·kcon+0x250(SB)/8, $0x766a0abb766a0abb
   160  DATA  ·kcon+0x258(SB)/8, $0x766a0abb766a0abb
   161  DATA  ·kcon+0x260(SB)/8, $0x81c2c92e81c2c92e
   162  DATA  ·kcon+0x268(SB)/8, $0x81c2c92e81c2c92e
   163  DATA  ·kcon+0x270(SB)/8, $0x92722c8592722c85
   164  DATA  ·kcon+0x278(SB)/8, $0x92722c8592722c85
   165  DATA  ·kcon+0x280(SB)/8, $0xa2bfe8a1a2bfe8a1
   166  DATA  ·kcon+0x288(SB)/8, $0xa2bfe8a1a2bfe8a1
   167  DATA  ·kcon+0x290(SB)/8, $0xa81a664ba81a664b
   168  DATA  ·kcon+0x298(SB)/8, $0xa81a664ba81a664b
   169  DATA  ·kcon+0x2A0(SB)/8, $0xc24b8b70c24b8b70
   170  DATA  ·kcon+0x2A8(SB)/8, $0xc24b8b70c24b8b70
   171  DATA  ·kcon+0x2B0(SB)/8, $0xc76c51a3c76c51a3
   172  DATA  ·kcon+0x2B8(SB)/8, $0xc76c51a3c76c51a3
   173  DATA  ·kcon+0x2C0(SB)/8, $0xd192e819d192e819
   174  DATA  ·kcon+0x2C8(SB)/8, $0xd192e819d192e819
   175  DATA  ·kcon+0x2D0(SB)/8, $0xd6990624d6990624
   176  DATA  ·kcon+0x2D8(SB)/8, $0xd6990624d6990624
   177  DATA  ·kcon+0x2E0(SB)/8, $0xf40e3585f40e3585
   178  DATA  ·kcon+0x2E8(SB)/8, $0xf40e3585f40e3585
   179  DATA  ·kcon+0x2F0(SB)/8, $0x106aa070106aa070
   180  DATA  ·kcon+0x2F8(SB)/8, $0x106aa070106aa070
   181  DATA  ·kcon+0x300(SB)/8, $0x19a4c11619a4c116
   182  DATA  ·kcon+0x308(SB)/8, $0x19a4c11619a4c116
   183  DATA  ·kcon+0x310(SB)/8, $0x1e376c081e376c08
   184  DATA  ·kcon+0x318(SB)/8, $0x1e376c081e376c08
   185  DATA  ·kcon+0x320(SB)/8, $0x2748774c2748774c
   186  DATA  ·kcon+0x328(SB)/8, $0x2748774c2748774c
   187  DATA  ·kcon+0x330(SB)/8, $0x34b0bcb534b0bcb5
   188  DATA  ·kcon+0x338(SB)/8, $0x34b0bcb534b0bcb5
   189  DATA  ·kcon+0x340(SB)/8, $0x391c0cb3391c0cb3
   190  DATA  ·kcon+0x348(SB)/8, $0x391c0cb3391c0cb3
   191  DATA  ·kcon+0x350(SB)/8, $0x4ed8aa4a4ed8aa4a
   192  DATA  ·kcon+0x358(SB)/8, $0x4ed8aa4a4ed8aa4a
   193  DATA  ·kcon+0x360(SB)/8, $0x5b9cca4f5b9cca4f
   194  DATA  ·kcon+0x368(SB)/8, $0x5b9cca4f5b9cca4f
   195  DATA  ·kcon+0x370(SB)/8, $0x682e6ff3682e6ff3
   196  DATA  ·kcon+0x378(SB)/8, $0x682e6ff3682e6ff3
   197  DATA  ·kcon+0x380(SB)/8, $0x748f82ee748f82ee
   198  DATA  ·kcon+0x388(SB)/8, $0x748f82ee748f82ee
   199  DATA  ·kcon+0x390(SB)/8, $0x78a5636f78a5636f
   200  DATA  ·kcon+0x398(SB)/8, $0x78a5636f78a5636f
   201  DATA  ·kcon+0x3A0(SB)/8, $0x84c8781484c87814
   202  DATA  ·kcon+0x3A8(SB)/8, $0x84c8781484c87814
   203  DATA  ·kcon+0x3B0(SB)/8, $0x8cc702088cc70208
   204  DATA  ·kcon+0x3B8(SB)/8, $0x8cc702088cc70208
   205  DATA  ·kcon+0x3C0(SB)/8, $0x90befffa90befffa
   206  DATA  ·kcon+0x3C8(SB)/8, $0x90befffa90befffa
   207  DATA  ·kcon+0x3D0(SB)/8, $0xa4506ceba4506ceb
   208  DATA  ·kcon+0x3D8(SB)/8, $0xa4506ceba4506ceb
   209  DATA  ·kcon+0x3E0(SB)/8, $0xbef9a3f7bef9a3f7
   210  DATA  ·kcon+0x3E8(SB)/8, $0xbef9a3f7bef9a3f7
   211  DATA  ·kcon+0x3F0(SB)/8, $0xc67178f2c67178f2
   212  DATA  ·kcon+0x3F8(SB)/8, $0xc67178f2c67178f2
   213  DATA  ·kcon+0x400(SB)/8, $0x0000000000000000
   214  DATA  ·kcon+0x408(SB)/8, $0x0000000000000000
   215  DATA  ·kcon+0x410(SB)/8, $0x1011121310111213	// permutation control vectors
   216  DATA  ·kcon+0x418(SB)/8, $0x1011121300010203
   217  DATA  ·kcon+0x420(SB)/8, $0x1011121310111213
   218  DATA  ·kcon+0x428(SB)/8, $0x0405060700010203
   219  DATA  ·kcon+0x430(SB)/8, $0x1011121308090a0b
   220  DATA  ·kcon+0x438(SB)/8, $0x0405060700010203
   221  GLOBL ·kcon(SB), RODATA, $1088
   222  
   223  #define SHA256ROUND0(a, b, c, d, e, f, g, h, xi) \
   224  	VSEL		g, f, e, FUNC; \
   225  	VSHASIGMAW	$15, e, $1, S1; \
   226  	VADDUWM		xi, h, h; \
   227  	VSHASIGMAW	$0, a, $1, S0; \
   228  	VADDUWM		FUNC, h, h; \
   229  	VXOR		b, a, FUNC; \
   230  	VADDUWM		S1, h, h; \
   231  	VSEL		b, c, FUNC, FUNC; \
   232  	VADDUWM		KI, g, g; \
   233  	VADDUWM		h, d, d; \
   234  	VADDUWM		FUNC, S0, S0; \
   235  	LVX		(TBL)(IDX), KI; \
   236  	ADD		$16, IDX; \
   237  	VADDUWM		S0, h, h
   238  
   239  #define SHA256ROUND1(a, b, c, d, e, f, g, h, xi, xj, xj_1, xj_9, xj_14) \
   240  	VSHASIGMAW	$0, xj_1, $0, s0; \
   241  	VSEL		g, f, e, FUNC; \
   242  	VSHASIGMAW	$15, e, $1, S1; \
   243  	VADDUWM		xi, h, h; \
   244  	VSHASIGMAW	$0, a, $1, S0; \
   245  	VSHASIGMAW	$15, xj_14, $0, s1; \
   246  	VADDUWM		FUNC, h, h; \
   247  	VXOR		b, a, FUNC; \
   248  	VADDUWM		xj_9, xj, xj; \
   249  	VADDUWM		S1, h, h; \
   250  	VSEL		b, c, FUNC, FUNC; \
   251  	VADDUWM		KI, g, g; \
   252  	VADDUWM		h, d, d; \
   253  	VADDUWM		FUNC, S0, S0; \
   254  	VADDUWM		s0, xj, xj; \
   255  	LVX		(TBL)(IDX), KI; \
   256  	ADD		$16, IDX; \
   257  	VADDUWM		S0, h, h; \
   258  	VADDUWM		s1, xj, xj
   259  
   260  // func block(dig *digest, p []byte)
   261  TEXT ·block(SB),0,$128-32
   262  	MOVD	dig+0(FP), CTX
   263  	MOVD	p_base+8(FP), INP
   264  	MOVD	p_len+16(FP), LEN
   265  
   266  	SRD	$6, LEN
   267  	SLD	$6, LEN
   268  
   269  	ADD	INP, LEN, END
   270  
   271  	CMP	INP, END
   272  	BEQ	end
   273  
   274  	MOVD	$·kcon(SB), TBL
   275  	MOVD	R1, OFFLOAD
   276  
   277  	MOVD	R0, CNT
   278  	MOVWZ	$0x10, HEX10
   279  	MOVWZ	$0x20, HEX20
   280  	MOVWZ	$0x30, HEX30
   281  	MOVWZ	$0x40, HEX40
   282  	MOVWZ	$0x50, HEX50
   283  	MOVWZ	$0x60, HEX60
   284  	MOVWZ	$0x70, HEX70
   285  
   286  	MOVWZ	$8, IDX
   287  	LVSL	(IDX)(R0), LEMASK
   288  	VSPLTISB	$0x0F, KI
   289  	VXOR	KI, LEMASK, LEMASK
   290  
   291  	LXVW4X	(CTX)(HEX00), VS32	// v0 = vs32
   292  	LXVW4X	(CTX)(HEX10), VS36	// v4 = vs36
   293  
   294  	// unpack the input values into vector registers
   295  	VSLDOI	$4, V0, V0, V1
   296  	VSLDOI	$8, V0, V0, V2
   297  	VSLDOI	$12, V0, V0, V3
   298  	VSLDOI	$4, V4, V4, V5
   299  	VSLDOI	$8, V4, V4, V6
   300  	VSLDOI	$12, V4, V4, V7
   301  
   302  loop:
   303  	LVX	(TBL)(HEX00), KI
   304  	MOVWZ	$16, IDX
   305  
   306  	LXVD2X	(INP)(R0), VS40	// load v8 (=vs40) in advance
   307  	ADD	$16, INP
   308  
   309  	STVX	V0, (OFFLOAD+HEX00)
   310  	STVX	V1, (OFFLOAD+HEX10)
   311  	STVX	V2, (OFFLOAD+HEX20)
   312  	STVX	V3, (OFFLOAD+HEX30)
   313  	STVX	V4, (OFFLOAD+HEX40)
   314  	STVX	V5, (OFFLOAD+HEX50)
   315  	STVX	V6, (OFFLOAD+HEX60)
   316  	STVX	V7, (OFFLOAD+HEX70)
   317  
   318  	VADDUWM	KI, V7, V7	// h+K[i]
   319  	LVX	(TBL)(IDX), KI
   320  	ADD	$16, IDX
   321  
   322  	VPERM	V8, V8, LEMASK, V8
   323  	SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V8)
   324  	VSLDOI	$4, V8, V8, V9
   325  	SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V9)
   326  	VSLDOI	$4, V9, V9, V10
   327  	SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V10)
   328  	LXVD2X	(INP)(R0), VS44	// load v12 (=vs44) in advance
   329  	ADD	$16, INP, INP
   330  	VSLDOI	$4, V10, V10, V11
   331  	SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V11)
   332  	VPERM	V12, V12, LEMASK, V12
   333  	SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V12)
   334  	VSLDOI	$4, V12, V12, V13
   335  	SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V13)
   336  	VSLDOI	$4, V13, V13, V14
   337  	SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V14)
   338  	LXVD2X	(INP)(R0), VS48	// load v16 (=vs48) in advance
   339  	ADD	$16, INP, INP
   340  	VSLDOI	$4, V14, V14, V15
   341  	SHA256ROUND0(V1, V2, V3, V4, V5, V6, V7, V0, V15)
   342  	VPERM	V16, V16, LEMASK, V16
   343  	SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V16)
   344  	VSLDOI	$4, V16, V16, V17
   345  	SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V17)
   346  	VSLDOI	$4, V17, V17, V18
   347  	SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V18)
   348  	VSLDOI	$4, V18, V18, V19
   349  	LXVD2X	(INP)(R0), VS52	// load v20 (=vs52) in advance
   350  	ADD	$16, INP, INP
   351  	SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V19)
   352  	VPERM	V20, V20, LEMASK, V20
   353  	SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V20)
   354  	VSLDOI	$4, V20, V20, V21
   355  	SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V21)
   356  	VSLDOI	$4, V21, V21, V22
   357  	SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V22)
   358  	VSLDOI	$4, V22, V22, V23
   359  	SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22)
   360  
   361  	MOVWZ	$3, TEMP
   362  	MOVWZ	TEMP, CTR
   363  
   364  L16_xx:
   365  	SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V18, V23)
   366  	SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V9, V10, V11, V19, V8)
   367  	SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V10, V11, V12, V20, V9)
   368  	SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V11, V12, V13, V21, V10)
   369  	SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V12, V13, V14, V22, V11)
   370  	SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V13, V14, V15, V23, V12)
   371  	SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V14, V15, V16, V8, V13)
   372  	SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V15, V16, V17, V9, V14)
   373  	SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V16, V17, V18, V10, V15)
   374  	SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V17, V18, V19, V11, V16)
   375  	SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V18, V19, V20, V12, V17)
   376  	SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V19, V20, V21, V13, V18)
   377  	SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V20, V21, V22, V14, V19)
   378  	SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V21, V22, V23, V15, V20)
   379  	SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V22, V23, V8, V16, V21)
   380  	SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22)
   381  
   382  	BC	0x10, 0, L16_xx		// bdnz
   383  
   384  	LVX	(OFFLOAD)(HEX00), V10
   385  
   386  	LVX	(OFFLOAD)(HEX10), V11
   387  	VADDUWM	V10, V0, V0
   388  	LVX	(OFFLOAD)(HEX20), V12
   389  	VADDUWM	V11, V1, V1
   390  	LVX	(OFFLOAD)(HEX30), V13
   391  	VADDUWM	V12, V2, V2
   392  	LVX	(OFFLOAD)(HEX40), V14
   393  	VADDUWM	V13, V3, V3
   394  	LVX	(OFFLOAD)(HEX50), V15
   395  	VADDUWM	V14, V4, V4
   396  	LVX	(OFFLOAD)(HEX60), V16
   397  	VADDUWM	V15, V5, V5
   398  	LVX	(OFFLOAD)(HEX70), V17
   399  	VADDUWM	V16, V6, V6
   400  	VADDUWM	V17, V7, V7
   401  
   402  	CMPU	INP, END
   403  	BLT	loop
   404  
   405  	LVX	(TBL)(IDX), V8
   406  	ADD	$16, IDX
   407  	VPERM	V0, V1, KI, V0
   408  	LVX	(TBL)(IDX), V9
   409  	VPERM	V4, V5, KI, V4
   410  	VPERM	V0, V2, V8, V0
   411  	VPERM	V4, V6, V8, V4
   412  	VPERM	V0, V3, V9, V0
   413  	VPERM	V4, V7, V9, V4
   414  	STXVD2X	VS32, (CTX+HEX00)	// v0 = vs32
   415  	STXVD2X	VS36, (CTX+HEX10)	// v4 = vs36
   416  
   417  end:
   418  	RET
   419  
   420  

View as plain text