Text file src/crypto/aes/asm_ppc64le.s

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Based on CRYPTOGAMS code with the following comment:
     6  // # ====================================================================
     7  // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
     8  // # project. The module is, however, dual licensed under OpenSSL and
     9  // # CRYPTOGAMS licenses depending on where you obtain it. For further
    10  // # details see http://www.openssl.org/~appro/cryptogams/.
    11  // # ====================================================================
    12  
    13  // Original code can be found at the link below:
    14  // https://github.com/dot-asm/cryptogams/blob/master/ppc/aesp8-ppc.pl
    15  
    16  // Some function names were changed to be consistent with Go function
    17  // names. For instance, function aes_p8_set_{en,de}crypt_key become
    18  // set{En,De}cryptKeyAsm. I also split setEncryptKeyAsm in two parts
    19  // and a new session was created (doEncryptKeyAsm). This was necessary to
    20  // avoid arguments overwriting when setDecryptKeyAsm calls setEncryptKeyAsm.
    21  // There were other modifications as well but kept the same functionality.
    22  
    23  #include "textflag.h"
    24  
    25  // For set{En,De}cryptKeyAsm
    26  #define INP     R3
    27  #define BITS    R4
    28  #define OUT     R5
    29  #define PTR     R6
    30  #define CNT     R7
    31  #define ROUNDS  R8
    32  #define TEMP    R19
    33  #define ZERO    V0
    34  #define IN0     V1
    35  #define IN1     V2
    36  #define KEY     V3
    37  #define RCON    V4
    38  #define MASK    V5
    39  #define TMP     V6
    40  #define STAGE   V7
    41  #define OUTPERM V8
    42  #define OUTMASK V9
    43  #define OUTHEAD V10
    44  #define OUTTAIL V11
    45  
    46  // For {en,de}cryptBlockAsm
    47  #define BLK_INP    R3
    48  #define BLK_OUT    R4
    49  #define BLK_KEY    R5
    50  #define BLK_ROUNDS R6
    51  #define BLK_IDX    R7
    52  
    53  DATA ·rcon+0x00(SB)/8, $0x0100000001000000 // RCON
    54  DATA ·rcon+0x08(SB)/8, $0x0100000001000000 // RCON
    55  DATA ·rcon+0x10(SB)/8, $0x1b0000001b000000
    56  DATA ·rcon+0x18(SB)/8, $0x1b0000001b000000
    57  DATA ·rcon+0x20(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
    58  DATA ·rcon+0x28(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
    59  DATA ·rcon+0x30(SB)/8, $0x0000000000000000
    60  DATA ·rcon+0x38(SB)/8, $0x0000000000000000
    61  GLOBL ·rcon(SB), RODATA, $64
    62  
    63  // func setEncryptKeyAsm(key *byte, keylen int, enc *uint32) int
    64  TEXT ·setEncryptKeyAsm(SB), NOSPLIT|NOFRAME, $0
    65  	// Load the arguments inside the registers
    66  	MOVD	key+0(FP), INP
    67  	MOVD	keylen+8(FP), BITS
    68  	MOVD	enc+16(FP), OUT
    69  	JMP	·doEncryptKeyAsm(SB)
    70  
    71  // This text is used both setEncryptKeyAsm and setDecryptKeyAsm
    72  TEXT ·doEncryptKeyAsm(SB), NOSPLIT|NOFRAME, $0
    73  	// Do not change R10 since it's storing the LR value in setDecryptKeyAsm
    74  
    75  	// Check arguments
    76  	MOVD	$-1, PTR               // li    6,-1       exit code to -1 (255)
    77  	CMPU	INP, $0                // cmpldi r3,0      input key pointer set?
    78  	BC	0x0E, 2, enc_key_abort // beq-  .Lenc_key_abort
    79  	CMPU	OUT, $0                // cmpldi r5,0      output key pointer set?
    80  	BC	0x0E, 2, enc_key_abort // beq-  .Lenc_key_abort
    81  	MOVD	$-2, PTR               // li    6,-2       exit code to -2 (254)
    82  	CMPW	BITS, $128             // cmpwi 4,128      greater or equal to 128
    83  	BC	0x0E, 0, enc_key_abort // blt-  .Lenc_key_abort
    84  	CMPW	BITS, $256             // cmpwi 4,256      lesser or equal to 256
    85  	BC	0x0E, 1, enc_key_abort // bgt-  .Lenc_key_abort
    86  	ANDCC	$0x3f, BITS, TEMP      // andi. 0,4,0x3f   multiple of 64
    87  	BC	0x06, 2, enc_key_abort // bne-  .Lenc_key_abort
    88  
    89  	MOVD	$·rcon(SB), PTR // PTR point to rcon addr
    90  
    91  	// Get key from memory and write aligned into VR
    92  	NEG	INP, R9            // neg   9,3        R9 is ~INP + 1
    93  	LVX	(INP)(R0), IN0     // lvx   1,0,3      Load key inside IN0
    94  	ADD	$15, INP, INP      // addi  3,3,15     Add 15B to INP addr
    95  	LVSR	(R9)(R0), KEY      // lvsr  3,0,9
    96  	MOVD	$0x20, R8          // li    8,0x20     R8 = 32
    97  	CMPW	BITS, $192         // cmpwi 4,192      Key size == 192?
    98  	LVX	(INP)(R0), IN1     // lvx   2,0,3
    99  	VSPLTISB	$0x0f, MASK// vspltisb 5,0x0f  0x0f0f0f0f... mask
   100  	LVX	(PTR)(R0), RCON    // lvx   4,0,6      Load first 16 bytes into RCON
   101  	VXOR	KEY, MASK, KEY     // vxor  3,3,5      Adjust for byte swap
   102  	LVX	(PTR)(R8), MASK    // lvx   5,8,6
   103  	ADD	$0x10, PTR, PTR    // addi  6,6,0x10   PTR to next 16 bytes of RCON
   104  	VPERM	IN0, IN1, KEY, IN0 // vperm 1,1,2,3    Align
   105  	MOVD	$8, CNT            // li    7,8        CNT = 8
   106  	VXOR	ZERO, ZERO, ZERO   // vxor  0,0,0      Zero to be zero :)
   107  	MOVD	CNT, CTR           // mtctr 7          Set the counter to 8 (rounds)
   108  
   109  	LVSL	(OUT)(R0), OUTPERM              // lvsl  8,0,5
   110  	VSPLTISB	$-1, OUTMASK                    // vspltisb      9,-1
   111  	LVX	(OUT)(R0), OUTHEAD              // lvx   10,0,5
   112  	VPERM	OUTMASK, ZERO, OUTPERM, OUTMASK // vperm 9,9,0,8
   113  
   114  	BLT	loop128      // blt   .Loop128
   115  	ADD	$8, INP, INP // addi  3,3,8
   116  	BEQ	l192         // beq   .L192
   117  	ADD	$8, INP, INP // addi  3,3,8
   118  	JMP	l256         // b     .L256
   119  
   120  loop128:
   121  	// Key schedule (Round 1 to 8)
   122  	VPERM	IN0, IN0, MASK, KEY              // vperm 3,1,1,5         Rotate-n-splat
   123  	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   124  	VPERM	IN0, IN0, OUTPERM, OUTTAIL       // vperm 11,1,1,8    Rotate
   125  	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   126  	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
   127  	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   128  	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5        Write to output
   129  	ADD	$16, OUT, OUT                    // addi 5,5,16       Point to the next round
   130  
   131  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   132  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   133  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   134  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   135  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   136  	VADDUWM	RCON, RCON, RCON    // vadduwm 4,4,4
   137  	VXOR	IN0, KEY, IN0       // vxor 1,1,3
   138  	BC	0x10, 0, loop128    // bdnz .Loop128
   139  
   140  	LVX	(PTR)(R0), RCON // lvx 4,0,6     Last two round keys
   141  
   142  	// Key schedule (Round 9)
   143  	VPERM	IN0, IN0, MASK, KEY              // vperm 3,1,1,5   Rotate-n-spat
   144  	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   145  	VPERM	IN0, IN0, OUTPERM, OUTTAIL       // vperm 11,1,1,8  Rotate
   146  	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   147  	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
   148  	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   149  	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5   Round 9
   150  	ADD	$16, OUT, OUT                    // addi 5,5,16
   151  
   152  	// Key schedule (Round 10)
   153  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   154  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   155  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   156  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   157  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   158  	VADDUWM	RCON, RCON, RCON    // vadduwm 4,4,4
   159  	VXOR	IN0, KEY, IN0       // vxor 1,1,3
   160  
   161  	VPERM	IN0, IN0, MASK, KEY              // vperm 3,1,1,5   Rotate-n-splat
   162  	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   163  	VPERM	IN0, IN0, OUTPERM, OUTTAIL       // vperm 11,1,1,8  Rotate
   164  	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   165  	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
   166  	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   167  	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5    Round 10
   168  	ADD	$16, OUT, OUT                    // addi 5,5,16
   169  
   170  	// Key schedule (Round 11)
   171  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   172  	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   173  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   174  	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   175  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   176  	VXOR	IN0, KEY, IN0                    // vxor 1,1,3
   177  	VPERM	IN0, IN0, OUTPERM, OUTTAIL       // vperm 11,1,1,8
   178  	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   179  	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
   180  	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5  Round 11
   181  
   182  	ADD	$15, OUT, INP   // addi  3,5,15
   183  	ADD	$0x50, OUT, OUT // addi  5,5,0x50
   184  
   185  	MOVD	$10, ROUNDS // li    8,10
   186  	JMP	done        // b     .Ldone
   187  
   188  l192:
   189  	LVX	(INP)(R0), TMP                   // lvx 6,0,3
   190  	MOVD	$4, CNT                          // li 7,4
   191  	VPERM	IN0, IN0, OUTPERM, OUTTAIL       // vperm 11,1,1,8
   192  	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   193  	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
   194  	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5
   195  	ADD	$16, OUT, OUT                    // addi 5,5,16
   196  	VPERM	IN1, TMP, KEY, IN1               // vperm 2,2,6,3
   197  	VSPLTISB	$8, KEY                  // vspltisb 3,8
   198  	MOVD	CNT, CTR                         // mtctr 7
   199  	VSUBUBM	MASK, KEY, MASK                  // vsububm 5,5,3
   200  
   201  loop192:
   202  	VPERM	IN1, IN1, MASK, KEY // vperm 3,2,2,5
   203  	VSLDOI	$12, ZERO, IN0, TMP // vsldoi 6,0,1,12
   204  	VCIPHERLAST	KEY, RCON, KEY      // vcipherlast 3,3,4
   205  
   206  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   207  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   208  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   209  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   210  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   211  
   212  	VSLDOI	$8, ZERO, IN1, STAGE  // vsldoi 7,0,2,8
   213  	VSPLTW	$3, IN0, TMP          // vspltw 6,1,3
   214  	VXOR	TMP, IN1, TMP         // vxor 6,6,2
   215  	VSLDOI	$12, ZERO, IN1, IN1   // vsldoi 2,0,2,12
   216  	VADDUWM	RCON, RCON, RCON      // vadduwm 4,4,4
   217  	VXOR	IN1, TMP, IN1         // vxor 2,2,6
   218  	VXOR	IN0, KEY, IN0         // vxor 1,1,3
   219  	VXOR	IN1, KEY, IN1         // vxor 2,2,3
   220  	VSLDOI	$8, STAGE, IN0, STAGE // vsldoi 7,7,1,8
   221  
   222  	VPERM	IN1, IN1, MASK, KEY              // vperm 3,2,2,5
   223  	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   224  	VPERM	STAGE, STAGE, OUTPERM, OUTTAIL   // vperm 11,7,7,8
   225  	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   226  	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
   227  	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   228  	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5
   229  	ADD	$16, OUT, OUT                    // addi 5,5,16
   230  
   231  	VSLDOI	$8, IN0, IN1, STAGE              // vsldoi 7,1,2,8
   232  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   233  	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   234  	VPERM	STAGE, STAGE, OUTPERM, OUTTAIL   // vperm 11,7,7,8
   235  	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   236  	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
   237  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   238  	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   239  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   240  	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5
   241  	ADD	$16, OUT, OUT                    // addi 5,5,16
   242  
   243  	VSPLTW	$3, IN0, TMP                     // vspltw 6,1,3
   244  	VXOR	TMP, IN1, TMP                    // vxor 6,6,2
   245  	VSLDOI	$12, ZERO, IN1, IN1              // vsldoi 2,0,2,12
   246  	VADDUWM	RCON, RCON, RCON                 // vadduwm 4,4,4
   247  	VXOR	IN1, TMP, IN1                    // vxor 2,2,6
   248  	VXOR	IN0, KEY, IN0                    // vxor 1,1,3
   249  	VXOR	IN1, KEY, IN1                    // vxor 2,2,3
   250  	VPERM	IN0, IN0, OUTPERM, OUTTAIL       // vperm 11,1,1,8
   251  	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   252  	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
   253  	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5
   254  	ADD	$15, OUT, INP                    // addi 3,5,15
   255  	ADD	$16, OUT, OUT                    // addi 5,5,16
   256  	BC	0x10, 0, loop192                 // bdnz .Loop192
   257  
   258  	MOVD	$12, ROUNDS     // li 8,12
   259  	ADD	$0x20, OUT, OUT // addi 5,5,0x20
   260  	BR	done            // b .Ldone
   261  
   262  l256:
   263  	LVX	(INP)(R0), TMP                   // lvx 6,0,3
   264  	MOVD	$7, CNT                          // li 7,7
   265  	MOVD	$14, ROUNDS                      // li 8,14
   266  	VPERM	IN0, IN0, OUTPERM, OUTTAIL       // vperm 11,1,1,8
   267  	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   268  	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
   269  	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5
   270  	ADD	$16, OUT, OUT                    // addi 5,5,16
   271  	VPERM	IN1, TMP, KEY, IN1               // vperm 2,2,6,3
   272  	MOVD	CNT, CTR                         // mtctr 7
   273  
   274  loop256:
   275  	VPERM	IN1, IN1, MASK, KEY              // vperm 3,2,2,5
   276  	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   277  	VPERM	IN1, IN1, OUTPERM, OUTTAIL       // vperm 11,2,2,8
   278  	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   279  	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
   280  	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   281  	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5
   282  	ADD	$16, OUT, OUT                    // addi 5,5,16
   283  
   284  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   285  	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   286  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   287  	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   288  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   289  	VADDUWM	RCON, RCON, RCON                 // vadduwm 4,4,4
   290  	VXOR	IN0, KEY, IN0                    // vxor 1,1,3
   291  	VPERM	IN0, IN0, OUTPERM, OUTTAIL       // vperm 11,1,1,8
   292  	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   293  	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
   294  	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5
   295  	ADD	$15, OUT, INP                    // addi 3,5,15
   296  	ADD	$16, OUT, OUT                    // addi 5,5,16
   297  	BC	0x12, 0, done                    // bdz .Ldone
   298  
   299  	VSPLTW	$3, IN0, KEY        // vspltw 3,1,3
   300  	VSLDOI	$12, ZERO, IN1, TMP // vsldoi 6,0,2,12
   301  	VSBOX	KEY, KEY            // vsbox 3,3
   302  
   303  	VXOR	IN1, TMP, IN1       // vxor 2,2,6
   304  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   305  	VXOR	IN1, TMP, IN1       // vxor 2,2,6
   306  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   307  	VXOR	IN1, TMP, IN1       // vxor 2,2,6
   308  
   309  	VXOR	IN1, KEY, IN1 // vxor 2,2,3
   310  	JMP	loop256       // b .Loop256
   311  
   312  done:
   313  	LVX	(INP)(R0), IN1             // lvx   2,0,3
   314  	VSEL	OUTHEAD, IN1, OUTMASK, IN1 // vsel 2,10,2,9
   315  	STVX	IN1, (INP+R0)              // stvx  2,0,3
   316  	MOVD	$0, PTR                    // li    6,0    set PTR to 0 (exit code 0)
   317  	MOVW	ROUNDS, 0(OUT)             // stw   8,0(5)
   318  
   319  enc_key_abort:
   320  	MOVD	PTR, INP        // mr    3,6    set exit code with PTR value
   321  	MOVD	INP, ret+24(FP) // Put return value into the FP
   322  	RET                  // blr
   323  
   324  // func setDecryptKeyAsm(key *byte, keylen int, dec *uint32) int
   325  TEXT ·setDecryptKeyAsm(SB), NOSPLIT|NOFRAME, $0
   326  	// Load the arguments inside the registers
   327  	MOVD	key+0(FP), INP
   328  	MOVD	keylen+8(FP), BITS
   329  	MOVD	dec+16(FP), OUT
   330  
   331  	MOVD	LR, R10              // mflr 10
   332  	CALL	·doEncryptKeyAsm(SB)
   333  	MOVD	R10, LR              // mtlr 10
   334  
   335  	CMPW	INP, $0                // cmpwi 3,0  exit 0 = ok
   336  	BC	0x06, 2, dec_key_abort // bne- .Ldec_key_abort
   337  
   338  	// doEncryptKeyAsm set ROUNDS (R8) with the proper value for each mode
   339  	SLW	$4, ROUNDS, CNT    // slwi 7,8,4
   340  	SUB	$240, OUT, INP     // subi 3,5,240
   341  	SRW	$1, ROUNDS, ROUNDS // srwi 8,8,1
   342  	ADD	R7, INP, OUT       // add 5,3,7
   343  	MOVD	ROUNDS, CTR        // mtctr 8
   344  
   345  	// dec_key will invert the key sequence in order to be used for decrypt
   346  dec_key:
   347  	MOVWZ	0(INP), TEMP     // lwz 0, 0(3)
   348  	MOVWZ	4(INP), R6       // lwz 6, 4(3)
   349  	MOVWZ	8(INP), R7       // lwz 7, 8(3)
   350  	MOVWZ	12(INP), R8      // lwz 8, 12(3)
   351  	ADD	$16, INP, INP    // addi 3,3,16
   352  	MOVWZ	0(OUT), R9       // lwz 9, 0(5)
   353  	MOVWZ	4(OUT), R10      // lwz 10,4(5)
   354  	MOVWZ	8(OUT), R11      // lwz 11,8(5)
   355  	MOVWZ	12(OUT), R12     // lwz 12,12(5)
   356  	MOVW	TEMP, 0(OUT)     // stw 0, 0(5)
   357  	MOVW	R6, 4(OUT)       // stw 6, 4(5)
   358  	MOVW	R7, 8(OUT)       // stw 7, 8(5)
   359  	MOVW	R8, 12(OUT)      // stw 8, 12(5)
   360  	SUB	$16, OUT, OUT    // subi 5,5,16
   361  	MOVW	R9, -16(INP)     // stw 9, -16(3)
   362  	MOVW	R10, -12(INP)    // stw 10,-12(3)
   363  	MOVW	R11, -8(INP)     // stw 11,-8(3)
   364  	MOVW	R12, -4(INP)     // stw 12,-4(3)
   365  	BC	0x10, 0, dec_key // bdnz .Ldeckey
   366  
   367  	XOR	R3, R3, R3 // xor 3,3,3      Clean R3
   368  
   369  dec_key_abort:
   370  	MOVD	R3, ret+24(FP) // Put return value into the FP
   371  	RET                 // blr
   372  
   373  // func encryptBlockAsm(dst, src *byte, enc *uint32)
   374  TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
   375  	// Load the arguments inside the registers
   376  	MOVD	dst+0(FP), BLK_OUT
   377  	MOVD	src+8(FP), BLK_INP
   378  	MOVD	enc+16(FP), BLK_KEY
   379  
   380  	MOVWZ	240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5)
   381  	MOVD	$15, BLK_IDX             // li 7,15
   382  
   383  	LVX	(BLK_INP)(R0), ZERO        // lvx 0,0,3
   384  	NEG	BLK_OUT, R11               // neg 11,4
   385  	LVX	(BLK_INP)(BLK_IDX), IN0    // lvx 1,7,3
   386  	LVSL	(BLK_INP)(R0), IN1         // lvsl 2,0,3
   387  	VSPLTISB	$0x0f, RCON        // vspltisb 4,0x0f
   388  	LVSR	(R11)(R0), KEY             // lvsr 3,0,11
   389  	VXOR	IN1, RCON, IN1             // vxor 2,2,4
   390  	MOVD	$16, BLK_IDX               // li 7,16
   391  	VPERM	ZERO, IN0, IN1, ZERO       // vperm 0,0,1,2
   392  	LVX	(BLK_KEY)(R0), IN0         // lvx 1,0,5
   393  	LVSR	(BLK_KEY)(R0), MASK        // lvsr 5,0,5
   394  	SRW	$1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1
   395  	LVX	(BLK_KEY)(BLK_IDX), IN1    // lvx 2,7,5
   396  	ADD	$16, BLK_IDX, BLK_IDX      // addi 7,7,16
   397  	SUB	$1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1
   398  	VPERM	IN1, IN0, MASK, IN0        // vperm 1,2,1,5
   399  
   400  	VXOR	ZERO, IN0, ZERO         // vxor 0,0,1
   401  	LVX	(BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
   402  	ADD	$16, BLK_IDX, BLK_IDX   // addi 7,7,16
   403  	MOVD	BLK_ROUNDS, CTR         // mtctr 6
   404  
   405  loop_enc:
   406  	VPERM	IN0, IN1, MASK, IN1     // vperm 2,1,2,5
   407  	VCIPHER	ZERO, IN1, ZERO         // vcipher 0,0,2
   408  	LVX	(BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
   409  	ADD	$16, BLK_IDX, BLK_IDX   // addi 7,7,16
   410  	VPERM	IN1, IN0, MASK, IN0     // vperm 1,2,1,5
   411  	VCIPHER	ZERO, IN0, ZERO         // vcipher 0,0,1
   412  	LVX	(BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
   413  	ADD	$16, BLK_IDX, BLK_IDX   // addi 7,7,16
   414  	BC	0x10, 0, loop_enc       // bdnz .Loop_enc
   415  
   416  	VPERM	IN0, IN1, MASK, IN1     // vperm 2,1,2,5
   417  	VCIPHER	ZERO, IN1, ZERO         // vcipher 0,0,2
   418  	LVX	(BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
   419  	VPERM	IN1, IN0, MASK, IN0     // vperm 1,2,1,5
   420  	VCIPHERLAST	ZERO, IN0, ZERO // vcipherlast 0,0,1
   421  
   422  	VSPLTISB	$-1, IN1         // vspltisb 2,-1
   423  	VXOR	IN0, IN0, IN0            // vxor 1,1,1
   424  	MOVD	$15, BLK_IDX             // li 7,15
   425  	VPERM	IN1, IN0, KEY, IN1       // vperm 2,2,1,3
   426  	VXOR	KEY, RCON, KEY           // vxor 3,3,4
   427  	LVX	(BLK_OUT)(R0), IN0       // lvx 1,0,4
   428  	VPERM	ZERO, ZERO, KEY, ZERO    // vperm 0,0,0,3
   429  	VSEL	IN0, ZERO, IN1, IN0      // vsel 1,1,0,2
   430  	LVX	(BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4
   431  	STVX	IN0, (BLK_OUT+R0)        // stvx 1,0,4
   432  	VSEL	ZERO, RCON, IN1, ZERO    // vsel 0,0,4,2
   433  	STVX	ZERO, (BLK_OUT+BLK_IDX)  // stvx 0,7,4
   434  
   435  	RET // blr
   436  
   437  // func decryptBlockAsm(dst, src *byte, dec *uint32)
   438  TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
   439  	// Load the arguments inside the registers
   440  	MOVD	dst+0(FP), BLK_OUT
   441  	MOVD	src+8(FP), BLK_INP
   442  	MOVD	dec+16(FP), BLK_KEY
   443  
   444  	MOVWZ	240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5)
   445  	MOVD	$15, BLK_IDX             // li 7,15
   446  
   447  	LVX	(BLK_INP)(R0), ZERO        // lvx 0,0,3
   448  	NEG	BLK_OUT, R11               // neg 11,4
   449  	LVX	(BLK_INP)(BLK_IDX), IN0    // lvx 1,7,3
   450  	LVSL	(BLK_INP)(R0), IN1         // lvsl 2,0,3
   451  	VSPLTISB	$0x0f, RCON        // vspltisb 4,0x0f
   452  	LVSR	(R11)(R0), KEY             // lvsr 3,0,11
   453  	VXOR	IN1, RCON, IN1             // vxor 2,2,4
   454  	MOVD	$16, BLK_IDX               // li 7,16
   455  	VPERM	ZERO, IN0, IN1, ZERO       // vperm 0,0,1,2
   456  	LVX	(BLK_KEY)(R0), IN0         // lvx 1,0,5
   457  	LVSR	(BLK_KEY)(R0), MASK        // lvsr 5,0,5
   458  	SRW	$1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1
   459  	LVX	(BLK_KEY)(BLK_IDX), IN1    // lvx 2,7,5
   460  	ADD	$16, BLK_IDX, BLK_IDX      // addi 7,7,16
   461  	SUB	$1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1
   462  	VPERM	IN1, IN0, MASK, IN0        // vperm 1,2,1,5
   463  
   464  	VXOR	ZERO, IN0, ZERO         // vxor 0,0,1
   465  	LVX	(BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
   466  	ADD	$16, BLK_IDX, BLK_IDX   // addi 7,7,16
   467  	MOVD	BLK_ROUNDS, CTR         // mtctr 6
   468  
   469  loop_dec:
   470  	VPERM	IN0, IN1, MASK, IN1     // vperm 2,1,2,5
   471  	VNCIPHER	ZERO, IN1, ZERO // vncipher 0,0,2
   472  	LVX	(BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
   473  	ADD	$16, BLK_IDX, BLK_IDX   // addi 7,7,16
   474  	VPERM	IN1, IN0, MASK, IN0     // vperm 1,2,1,5
   475  	VNCIPHER	ZERO, IN0, ZERO // vncipher 0,0,1
   476  	LVX	(BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
   477  	ADD	$16, BLK_IDX, BLK_IDX   // addi 7,7,16
   478  	BC	0x10, 0, loop_dec       // bdnz .Loop_dec
   479  
   480  	VPERM	IN0, IN1, MASK, IN1     // vperm 2,1,2,5
   481  	VNCIPHER	ZERO, IN1, ZERO // vncipher 0,0,2
   482  	LVX	(BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
   483  	VPERM	IN1, IN0, MASK, IN0     // vperm 1,2,1,5
   484  	VNCIPHERLAST	ZERO, IN0, ZERO // vncipherlast 0,0,1
   485  
   486  	VSPLTISB	$-1, IN1         // vspltisb 2,-1
   487  	VXOR	IN0, IN0, IN0            // vxor 1,1,1
   488  	MOVD	$15, BLK_IDX             // li 7,15
   489  	VPERM	IN1, IN0, KEY, IN1       // vperm 2,2,1,3
   490  	VXOR	KEY, RCON, KEY           // vxor 3,3,4
   491  	LVX	(BLK_OUT)(R0), IN0       // lvx 1,0,4
   492  	VPERM	ZERO, ZERO, KEY, ZERO    // vperm 0,0,0,3
   493  	VSEL	IN0, ZERO, IN1, IN0      // vsel 1,1,0,2
   494  	LVX	(BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4
   495  	STVX	IN0, (BLK_OUT+R0)        // stvx 1,0,4
   496  	VSEL	ZERO, RCON, IN1, ZERO    // vsel 0,0,4,2
   497  	STVX	ZERO, (BLK_OUT+BLK_IDX)  // stvx 0,7,4
   498  
   499  	RET // blr
   500  
   501  

View as plain text