Text file src/vendor/golang.org/x/crypto/chacha20/chacha_s390x.s

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build gc && !purego
     6  // +build gc,!purego
     7  
     8  #include "go_asm.h"
     9  #include "textflag.h"
    10  
    11  // This is an implementation of the ChaCha20 encryption algorithm as
    12  // specified in RFC 7539. It uses vector instructions to compute
    13  // 4 keystream blocks in parallel (256 bytes) which are then XORed
    14  // with the bytes in the input slice.
    15  
    16  GLOBL ·constants<>(SB), RODATA|NOPTR, $32
    17  // BSWAP: swap bytes in each 4-byte element
    18  DATA ·constants<>+0x00(SB)/4, $0x03020100
    19  DATA ·constants<>+0x04(SB)/4, $0x07060504
    20  DATA ·constants<>+0x08(SB)/4, $0x0b0a0908
    21  DATA ·constants<>+0x0c(SB)/4, $0x0f0e0d0c
    22  // J0: [j0, j1, j2, j3]
    23  DATA ·constants<>+0x10(SB)/4, $0x61707865
    24  DATA ·constants<>+0x14(SB)/4, $0x3320646e
    25  DATA ·constants<>+0x18(SB)/4, $0x79622d32
    26  DATA ·constants<>+0x1c(SB)/4, $0x6b206574
    27  
    28  #define BSWAP V5
    29  #define J0    V6
    30  #define KEY0  V7
    31  #define KEY1  V8
    32  #define NONCE V9
    33  #define CTR   V10
    34  #define M0    V11
    35  #define M1    V12
    36  #define M2    V13
    37  #define M3    V14
    38  #define INC   V15
    39  #define X0    V16
    40  #define X1    V17
    41  #define X2    V18
    42  #define X3    V19
    43  #define X4    V20
    44  #define X5    V21
    45  #define X6    V22
    46  #define X7    V23
    47  #define X8    V24
    48  #define X9    V25
    49  #define X10   V26
    50  #define X11   V27
    51  #define X12   V28
    52  #define X13   V29
    53  #define X14   V30
    54  #define X15   V31
    55  
    56  #define NUM_ROUNDS 20
    57  
    58  #define ROUND4(a0, a1, a2, a3, b0, b1, b2, b3, c0, c1, c2, c3, d0, d1, d2, d3) \
    59  	VAF    a1, a0, a0  \
    60  	VAF    b1, b0, b0  \
    61  	VAF    c1, c0, c0  \
    62  	VAF    d1, d0, d0  \
    63  	VX     a0, a2, a2  \
    64  	VX     b0, b2, b2  \
    65  	VX     c0, c2, c2  \
    66  	VX     d0, d2, d2  \
    67  	VERLLF $16, a2, a2 \
    68  	VERLLF $16, b2, b2 \
    69  	VERLLF $16, c2, c2 \
    70  	VERLLF $16, d2, d2 \
    71  	VAF    a2, a3, a3  \
    72  	VAF    b2, b3, b3  \
    73  	VAF    c2, c3, c3  \
    74  	VAF    d2, d3, d3  \
    75  	VX     a3, a1, a1  \
    76  	VX     b3, b1, b1  \
    77  	VX     c3, c1, c1  \
    78  	VX     d3, d1, d1  \
    79  	VERLLF $12, a1, a1 \
    80  	VERLLF $12, b1, b1 \
    81  	VERLLF $12, c1, c1 \
    82  	VERLLF $12, d1, d1 \
    83  	VAF    a1, a0, a0  \
    84  	VAF    b1, b0, b0  \
    85  	VAF    c1, c0, c0  \
    86  	VAF    d1, d0, d0  \
    87  	VX     a0, a2, a2  \
    88  	VX     b0, b2, b2  \
    89  	VX     c0, c2, c2  \
    90  	VX     d0, d2, d2  \
    91  	VERLLF $8, a2, a2  \
    92  	VERLLF $8, b2, b2  \
    93  	VERLLF $8, c2, c2  \
    94  	VERLLF $8, d2, d2  \
    95  	VAF    a2, a3, a3  \
    96  	VAF    b2, b3, b3  \
    97  	VAF    c2, c3, c3  \
    98  	VAF    d2, d3, d3  \
    99  	VX     a3, a1, a1  \
   100  	VX     b3, b1, b1  \
   101  	VX     c3, c1, c1  \
   102  	VX     d3, d1, d1  \
   103  	VERLLF $7, a1, a1  \
   104  	VERLLF $7, b1, b1  \
   105  	VERLLF $7, c1, c1  \
   106  	VERLLF $7, d1, d1
   107  
   108  #define PERMUTE(mask, v0, v1, v2, v3) \
   109  	VPERM v0, v0, mask, v0 \
   110  	VPERM v1, v1, mask, v1 \
   111  	VPERM v2, v2, mask, v2 \
   112  	VPERM v3, v3, mask, v3
   113  
   114  #define ADDV(x, v0, v1, v2, v3) \
   115  	VAF x, v0, v0 \
   116  	VAF x, v1, v1 \
   117  	VAF x, v2, v2 \
   118  	VAF x, v3, v3
   119  
   120  #define XORV(off, dst, src, v0, v1, v2, v3) \
   121  	VLM  off(src), M0, M3          \
   122  	PERMUTE(BSWAP, v0, v1, v2, v3) \
   123  	VX   v0, M0, M0                \
   124  	VX   v1, M1, M1                \
   125  	VX   v2, M2, M2                \
   126  	VX   v3, M3, M3                \
   127  	VSTM M0, M3, off(dst)
   128  
   129  #define SHUFFLE(a, b, c, d, t, u, v, w) \
   130  	VMRHF a, c, t \ // t = {a[0], c[0], a[1], c[1]}
   131  	VMRHF b, d, u \ // u = {b[0], d[0], b[1], d[1]}
   132  	VMRLF a, c, v \ // v = {a[2], c[2], a[3], c[3]}
   133  	VMRLF b, d, w \ // w = {b[2], d[2], b[3], d[3]}
   134  	VMRHF t, u, a \ // a = {a[0], b[0], c[0], d[0]}
   135  	VMRLF t, u, b \ // b = {a[1], b[1], c[1], d[1]}
   136  	VMRHF v, w, c \ // c = {a[2], b[2], c[2], d[2]}
   137  	VMRLF v, w, d // d = {a[3], b[3], c[3], d[3]}
   138  
   139  // func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)
   140  TEXT ·xorKeyStreamVX(SB), NOSPLIT, $0
   141  	MOVD $·constants<>(SB), R1
   142  	MOVD dst+0(FP), R2         // R2=&dst[0]
   143  	LMG  src+24(FP), R3, R4    // R3=&src[0] R4=len(src)
   144  	MOVD key+48(FP), R5        // R5=key
   145  	MOVD nonce+56(FP), R6      // R6=nonce
   146  	MOVD counter+64(FP), R7    // R7=counter
   147  
   148  	// load BSWAP and J0
   149  	VLM (R1), BSWAP, J0
   150  
   151  	// setup
   152  	MOVD  $95, R0
   153  	VLM   (R5), KEY0, KEY1
   154  	VLL   R0, (R6), NONCE
   155  	VZERO M0
   156  	VLEIB $7, $32, M0
   157  	VSRLB M0, NONCE, NONCE
   158  
   159  	// initialize counter values
   160  	VLREPF (R7), CTR
   161  	VZERO  INC
   162  	VLEIF  $1, $1, INC
   163  	VLEIF  $2, $2, INC
   164  	VLEIF  $3, $3, INC
   165  	VAF    INC, CTR, CTR
   166  	VREPIF $4, INC
   167  
   168  chacha:
   169  	VREPF $0, J0, X0
   170  	VREPF $1, J0, X1
   171  	VREPF $2, J0, X2
   172  	VREPF $3, J0, X3
   173  	VREPF $0, KEY0, X4
   174  	VREPF $1, KEY0, X5
   175  	VREPF $2, KEY0, X6
   176  	VREPF $3, KEY0, X7
   177  	VREPF $0, KEY1, X8
   178  	VREPF $1, KEY1, X9
   179  	VREPF $2, KEY1, X10
   180  	VREPF $3, KEY1, X11
   181  	VLR   CTR, X12
   182  	VREPF $1, NONCE, X13
   183  	VREPF $2, NONCE, X14
   184  	VREPF $3, NONCE, X15
   185  
   186  	MOVD $(NUM_ROUNDS/2), R1
   187  
   188  loop:
   189  	ROUND4(X0, X4, X12,  X8, X1, X5, X13,  X9, X2, X6, X14, X10, X3, X7, X15, X11)
   190  	ROUND4(X0, X5, X15, X10, X1, X6, X12, X11, X2, X7, X13, X8,  X3, X4, X14, X9)
   191  
   192  	ADD $-1, R1
   193  	BNE loop
   194  
   195  	// decrement length
   196  	ADD $-256, R4
   197  
   198  	// rearrange vectors
   199  	SHUFFLE(X0, X1, X2, X3, M0, M1, M2, M3)
   200  	ADDV(J0, X0, X1, X2, X3)
   201  	SHUFFLE(X4, X5, X6, X7, M0, M1, M2, M3)
   202  	ADDV(KEY0, X4, X5, X6, X7)
   203  	SHUFFLE(X8, X9, X10, X11, M0, M1, M2, M3)
   204  	ADDV(KEY1, X8, X9, X10, X11)
   205  	VAF CTR, X12, X12
   206  	SHUFFLE(X12, X13, X14, X15, M0, M1, M2, M3)
   207  	ADDV(NONCE, X12, X13, X14, X15)
   208  
   209  	// increment counters
   210  	VAF INC, CTR, CTR
   211  
   212  	// xor keystream with plaintext
   213  	XORV(0*64, R2, R3, X0, X4,  X8, X12)
   214  	XORV(1*64, R2, R3, X1, X5,  X9, X13)
   215  	XORV(2*64, R2, R3, X2, X6, X10, X14)
   216  	XORV(3*64, R2, R3, X3, X7, X11, X15)
   217  
   218  	// increment pointers
   219  	MOVD $256(R2), R2
   220  	MOVD $256(R3), R3
   221  
   222  	CMPBNE  R4, $0, chacha
   223  
   224  	VSTEF $0, CTR, (R7)
   225  	RET
   226  

View as plain text