Source file src/cmd/compile/internal/ssa/gen/PPC64Ops.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ignore
     6  // +build ignore
     7  
     8  package main
     9  
    10  import "strings"
    11  
    12  // Notes:
    13  //  - Less-than-64-bit integer types live in the low portion of registers.
    14  //    For now, the upper portion is junk; sign/zero-extension might be optimized in the future, but not yet.
    15  //  - Boolean types are zero or 1; stored in a byte, but loaded with AMOVBZ so the upper bytes of a register are zero.
    16  //  - *const instructions may use a constant larger than the instruction can encode.
    17  //    In this case the assembler expands to multiple instructions and uses tmp
    18  //    register (R31).
    19  
    20  var regNamesPPC64 = []string{
    21  	"R0", // REGZERO, not used, but simplifies counting in regalloc
    22  	"SP", // REGSP
    23  	"SB", // REGSB
    24  	"R3",
    25  	"R4",
    26  	"R5",
    27  	"R6",
    28  	"R7",
    29  	"R8",
    30  	"R9",
    31  	"R10",
    32  	"R11", // REGCTXT for closures
    33  	"R12",
    34  	"R13", // REGTLS
    35  	"R14",
    36  	"R15",
    37  	"R16",
    38  	"R17",
    39  	"R18",
    40  	"R19",
    41  	"R20",
    42  	"R21",
    43  	"R22",
    44  	"R23",
    45  	"R24",
    46  	"R25",
    47  	"R26",
    48  	"R27",
    49  	"R28",
    50  	"R29",
    51  	"g",   // REGG.  Using name "g" and setting Config.hasGReg makes it "just happen".
    52  	"R31", // REGTMP
    53  
    54  	"F0",
    55  	"F1",
    56  	"F2",
    57  	"F3",
    58  	"F4",
    59  	"F5",
    60  	"F6",
    61  	"F7",
    62  	"F8",
    63  	"F9",
    64  	"F10",
    65  	"F11",
    66  	"F12",
    67  	"F13",
    68  	"F14",
    69  	"F15",
    70  	"F16",
    71  	"F17",
    72  	"F18",
    73  	"F19",
    74  	"F20",
    75  	"F21",
    76  	"F22",
    77  	"F23",
    78  	"F24",
    79  	"F25",
    80  	"F26",
    81  	"F27",
    82  	"F28",
    83  	"F29",
    84  	"F30",
    85  	"F31",
    86  
    87  	// If you add registers, update asyncPreempt in runtime.
    88  
    89  	// "CR0",
    90  	// "CR1",
    91  	// "CR2",
    92  	// "CR3",
    93  	// "CR4",
    94  	// "CR5",
    95  	// "CR6",
    96  	// "CR7",
    97  
    98  	// "CR",
    99  	// "XER",
   100  	// "LR",
   101  	// "CTR",
   102  }
   103  
   104  func init() {
   105  	// Make map from reg names to reg integers.
   106  	if len(regNamesPPC64) > 64 {
   107  		panic("too many registers")
   108  	}
   109  	num := map[string]int{}
   110  	for i, name := range regNamesPPC64 {
   111  		num[name] = i
   112  	}
   113  	buildReg := func(s string) regMask {
   114  		m := regMask(0)
   115  		for _, r := range strings.Split(s, " ") {
   116  			if n, ok := num[r]; ok {
   117  				m |= regMask(1) << uint(n)
   118  				continue
   119  			}
   120  			panic("register " + r + " not found")
   121  		}
   122  		return m
   123  	}
   124  
   125  	var (
   126  		gp = buildReg("R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29")
   127  		fp = buildReg("F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26")
   128  		sp = buildReg("SP")
   129  		sb = buildReg("SB")
   130  		gr = buildReg("g")
   131  		// cr  = buildReg("CR")
   132  		// ctr = buildReg("CTR")
   133  		// lr  = buildReg("LR")
   134  		tmp     = buildReg("R31")
   135  		ctxt    = buildReg("R11")
   136  		callptr = buildReg("R12")
   137  		// tls = buildReg("R13")
   138  		gp01        = regInfo{inputs: nil, outputs: []regMask{gp}}
   139  		gp11        = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
   140  		gp21        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
   141  		gp21a0      = regInfo{inputs: []regMask{gp, gp | sp | sb}, outputs: []regMask{gp}}
   142  		gp31        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
   143  		gp22        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
   144  		gp32        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
   145  		gp1cr       = regInfo{inputs: []regMask{gp | sp | sb}}
   146  		gp2cr       = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
   147  		crgp        = regInfo{inputs: nil, outputs: []regMask{gp}}
   148  		crgp11      = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
   149  		crgp21      = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
   150  		gpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
   151  		gploadidx   = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
   152  		prefreg     = regInfo{inputs: []regMask{gp | sp | sb}}
   153  		gpstore     = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
   154  		gpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}}
   155  		gpstorezero = regInfo{inputs: []regMask{gp | sp | sb}} // ppc64.REGZERO is reserved zero value
   156  		gpxchg      = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
   157  		gpcas       = regInfo{inputs: []regMask{gp | sp | sb, gp, gp}, outputs: []regMask{gp}}
   158  		fp01        = regInfo{inputs: nil, outputs: []regMask{fp}}
   159  		fp11        = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
   160  		fpgp        = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
   161  		gpfp        = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
   162  		fp21        = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
   163  		fp31        = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
   164  		fp2cr       = regInfo{inputs: []regMask{fp, fp}}
   165  		fpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{fp}}
   166  		fploadidx   = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{fp}}
   167  		fpstore     = regInfo{inputs: []regMask{gp | sp | sb, fp}}
   168  		fpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, fp}}
   169  		callerSave  = regMask(gp | fp | gr)
   170  		r3          = buildReg("R3")
   171  		r4          = buildReg("R4")
   172  		r5          = buildReg("R5")
   173  		r6          = buildReg("R6")
   174  	)
   175  	ops := []opData{
   176  		{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},     // arg0 + arg1
   177  		{name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "Int64"},     // arg0 + auxInt
   178  		{name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true},   // arg0+arg1
   179  		{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true}, // arg0+arg1
   180  		{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"},                        // arg0-arg1
   181  		{name: "SUBFCconst", argLength: 1, reg: gp11, asm: "SUBC", aux: "Int64"},  // auxInt - arg0 (with carry)
   182  		{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"},                      // arg0-arg1
   183  		{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"},                    // arg0-arg1
   184  
   185  		{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
   186  		{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
   187  		{name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
   188  		{name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
   189  		{name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"},                  // (arg0*arg1)+arg2 (signed 64-bit)
   190  
   191  		{name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true},   // (arg0 * arg1) >> 64, signed
   192  		{name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true},   // (arg0 * arg1) >> 32, signed
   193  		{name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned
   194  		{name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
   195  		{name: "LoweredMuluhilo", argLength: 2, reg: gp22, resultNotInArgs: true},   // arg0 * arg1, returns (hi, lo)
   196  
   197  		{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true},   // arg0*arg1
   198  		{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1
   199  
   200  		{name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD"},   // arg0*arg1 + arg2
   201  		{name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS"}, // arg0*arg1 + arg2
   202  		{name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB"},   // arg0*arg1 - arg2
   203  		{name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS"}, // arg0*arg1 - arg2
   204  
   205  		{name: "SRAD", argLength: 2, reg: gp21, asm: "SRAD"}, // signed arg0 >> (arg1&127), 64 bit width (note: 127, not 63!)
   206  		{name: "SRAW", argLength: 2, reg: gp21, asm: "SRAW"}, // signed arg0 >> (arg1&63), 32 bit width
   207  		{name: "SRD", argLength: 2, reg: gp21, asm: "SRD"},   // unsigned arg0 >> (arg1&127), 64 bit width
   208  		{name: "SRW", argLength: 2, reg: gp21, asm: "SRW"},   // unsigned arg0 >> (arg1&63), 32 bit width
   209  		{name: "SLD", argLength: 2, reg: gp21, asm: "SLD"},   // arg0 << (arg1&127), 64 bit width
   210  		{name: "SLW", argLength: 2, reg: gp21, asm: "SLW"},   // arg0 << (arg1&63), 32 bit width
   211  
   212  		{name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"},   // arg0 rotate left by arg1 mod 64
   213  		{name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32
   214  		// The following are ops to implement the extended mnemonics for shifts as described in section C.8 of the ISA.
   215  		// The constant shift values are packed into the aux int32.
   216  		{name: "RLDICL", argLength: 1, reg: gp11, asm: "RLDICL", aux: "Int32"},     // arg0 extract bits identified by shift params"
   217  		{name: "CLRLSLWI", argLength: 1, reg: gp11, asm: "CLRLSLWI", aux: "Int32"}, //
   218  		{name: "CLRLSLDI", argLength: 1, reg: gp11, asm: "CLRLSLDI", aux: "Int32"}, //
   219  
   220  		{name: "LoweredAdd64Carry", argLength: 3, reg: gp32, resultNotInArgs: true}, // arg0 + arg1 + carry, returns (sum, carry)
   221  
   222  		{name: "SRADconst", argLength: 1, reg: gp11, asm: "SRAD", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width
   223  		{name: "SRAWconst", argLength: 1, reg: gp11, asm: "SRAW", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width
   224  		{name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int64"},   // unsigned arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width
   225  		{name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int64"},   // unsigned arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width
   226  		{name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"},   // arg0 << auxInt, 0 <= auxInt < 64, 64 bit width
   227  		{name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int64"},   // arg0 << auxInt, 0 <= auxInt < 32, 32 bit width
   228  
   229  		{name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"},   // arg0 rotate left by auxInt bits
   230  		{name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
   231  		{name: "EXTSWSLconst", argLength: 1, reg: gp11, asm: "EXTSWSLI", aux: "Int64"},
   232  
   233  		{name: "RLWINM", argLength: 1, reg: gp11, asm: "RLWNM", aux: "Int64"},                      // Rotate and mask by immediate "rlwinm". encodePPC64RotateMask describes aux
   234  		{name: "RLWNM", argLength: 2, reg: gp21, asm: "RLWNM", aux: "Int64"},                       // Rotate and mask by "rlwnm". encodePPC64RotateMask describes aux
   235  		{name: "RLWMI", argLength: 2, reg: gp21a0, asm: "RLWMI", aux: "Int64", resultInArg0: true}, // "rlwimi" similar aux encoding as above
   236  
   237  		{name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros
   238  		{name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit)
   239  
   240  		{name: "CNTTZD", argLength: 1, reg: gp11, asm: "CNTTZD"}, // count trailing zeros
   241  		{name: "CNTTZW", argLength: 1, reg: gp11, asm: "CNTTZW"}, // count trailing zeros (32 bit)
   242  
   243  		{name: "POPCNTD", argLength: 1, reg: gp11, asm: "POPCNTD"}, // number of set bits in arg0
   244  		{name: "POPCNTW", argLength: 1, reg: gp11, asm: "POPCNTW"}, // number of set bits in each word of arg0 placed in corresponding word
   245  		{name: "POPCNTB", argLength: 1, reg: gp11, asm: "POPCNTB"}, // number of set bits in each byte of arg0 placed in corresponding byte
   246  
   247  		{name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV"},   // arg0/arg1
   248  		{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0/arg1
   249  
   250  		{name: "DIVD", argLength: 2, reg: gp21, asm: "DIVD", typ: "Int64"},   // arg0/arg1 (signed 64-bit)
   251  		{name: "DIVW", argLength: 2, reg: gp21, asm: "DIVW", typ: "Int32"},   // arg0/arg1 (signed 32-bit)
   252  		{name: "DIVDU", argLength: 2, reg: gp21, asm: "DIVDU", typ: "Int64"}, // arg0/arg1 (unsigned 64-bit)
   253  		{name: "DIVWU", argLength: 2, reg: gp21, asm: "DIVWU", typ: "Int32"}, // arg0/arg1 (unsigned 32-bit)
   254  
   255  		{name: "MODUD", argLength: 2, reg: gp21, asm: "MODUD", typ: "UInt64"}, // arg0 % arg1 (unsigned 64-bit)
   256  		{name: "MODSD", argLength: 2, reg: gp21, asm: "MODSD", typ: "Int64"},  // arg0 % arg1 (signed 64-bit)
   257  		{name: "MODUW", argLength: 2, reg: gp21, asm: "MODUW", typ: "UInt32"}, // arg0 % arg1 (unsigned 32-bit)
   258  		{name: "MODSW", argLength: 2, reg: gp21, asm: "MODSW", typ: "Int32"},  // arg0 % arg1 (signed 32-bit)
   259  		// MOD is implemented as rem := arg0 - (arg0/arg1) * arg1
   260  
   261  		// Conversions are all float-to-float register operations.  "Integer" refers to encoding in the FP register.
   262  		{name: "FCTIDZ", argLength: 1, reg: fp11, asm: "FCTIDZ", typ: "Float64"}, // convert float to 64-bit int round towards zero
   263  		{name: "FCTIWZ", argLength: 1, reg: fp11, asm: "FCTIWZ", typ: "Float64"}, // convert float to 32-bit int round towards zero
   264  		{name: "FCFID", argLength: 1, reg: fp11, asm: "FCFID", typ: "Float64"},   // convert 64-bit integer to float
   265  		{name: "FCFIDS", argLength: 1, reg: fp11, asm: "FCFIDS", typ: "Float32"}, // convert 32-bit integer to float
   266  		{name: "FRSP", argLength: 1, reg: fp11, asm: "FRSP", typ: "Float64"},     // round float to 32-bit value
   267  
   268  		// Movement between float and integer registers with no change in bits; accomplished with stores+loads on PPC.
   269  		// Because the 32-bit load-literal-bits instructions have impoverished addressability, always widen the
   270  		// data instead and use FMOVDload and FMOVDstore instead (this will also dodge endianess issues).
   271  		// There are optimizations that should apply -- (Xi2f64 (MOVWload (not-ADD-ptr+offset) ) ) could use
   272  		// the word-load instructions.  (Xi2f64 (MOVDload ptr )) can be (FMOVDload ptr)
   273  
   274  		{name: "MFVSRD", argLength: 1, reg: fpgp, asm: "MFVSRD", typ: "Int64"},   // move 64 bits of F register into G register
   275  		{name: "MTVSRD", argLength: 1, reg: gpfp, asm: "MTVSRD", typ: "Float64"}, // move 64 bits of G register into F register
   276  
   277  		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true},                    // arg0&arg1
   278  		{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"},                                     // arg0&^arg1
   279  		{name: "ANDCC", argLength: 2, reg: gp2cr, asm: "ANDCC", commutative: true, typ: "Flags"}, // arg0&arg1 sets CC
   280  		{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true},                      // arg0|arg1
   281  		{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"},                                       // arg0|^arg1
   282  		{name: "ORCC", argLength: 2, reg: gp2cr, asm: "ORCC", commutative: true, typ: "Flags"},   // arg0|arg1 sets CC
   283  		{name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true},                    // ^(arg0|arg1)
   284  		{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true},      // arg0^arg1
   285  		{name: "XORCC", argLength: 2, reg: gp2cr, asm: "XORCC", commutative: true, typ: "Flags"}, // arg0^arg1 sets CC
   286  		{name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true},      // arg0^^arg1
   287  		{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},                                       // -arg0 (integer)
   288  		{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"},                                     // -arg0 (floating point)
   289  		{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"},                                   // sqrt(arg0) (floating point)
   290  		{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"},                                 // sqrt(arg0) (floating point, single precision)
   291  		{name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"},                                   // floor(arg0), float64
   292  		{name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"},                                    // ceil(arg0), float64
   293  		{name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"},                                   // trunc(arg0), float64
   294  		{name: "FROUND", argLength: 1, reg: fp11, asm: "FRIN"},                                   // round(arg0), float64
   295  		{name: "FABS", argLength: 1, reg: fp11, asm: "FABS"},                                     // abs(arg0), float64
   296  		{name: "FNABS", argLength: 1, reg: fp11, asm: "FNABS"},                                   // -abs(arg0), float64
   297  		{name: "FCPSGN", argLength: 2, reg: fp21, asm: "FCPSGN"},                                 // copysign arg0 -> arg1, float64
   298  
   299  		{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"},                                                                                     // arg0|aux
   300  		{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"},                                                                                   // arg0^aux
   301  		{name: "ANDconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, asm: "ANDCC", aux: "Int64", clobberFlags: true}, // arg0&aux // and-immediate sets CC on PPC, always.
   302  		{name: "ANDCCconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}}, asm: "ANDCC", aux: "Int64", typ: "Flags"},                             // arg0&aux == 0 // and-immediate sets CC on PPC, always.
   303  
   304  		{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB", typ: "Int64"},   // sign extend int8 to int64
   305  		{name: "MOVBZreg", argLength: 1, reg: gp11, asm: "MOVBZ", typ: "Int64"}, // zero extend uint8 to uint64
   306  		{name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH", typ: "Int64"},   // sign extend int16 to int64
   307  		{name: "MOVHZreg", argLength: 1, reg: gp11, asm: "MOVHZ", typ: "Int64"}, // zero extend uint16 to uint64
   308  		{name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW", typ: "Int64"},   // sign extend int32 to int64
   309  		{name: "MOVWZreg", argLength: 1, reg: gp11, asm: "MOVWZ", typ: "Int64"}, // zero extend uint32 to uint64
   310  
   311  		// Load bytes in the endian order of the arch from arg0+aux+auxint into a 64 bit register.
   312  		{name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"},  // load byte zero extend
   313  		{name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"},    // load 2 bytes sign extend
   314  		{name: "MOVHZload", argLength: 2, reg: gpload, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes zero extend
   315  		{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"},    // load 4 bytes sign extend
   316  		{name: "MOVWZload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes zero extend
   317  		{name: "MOVDload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"},    // load 8 bytes
   318  
   319  		// Load bytes in reverse endian order of the arch from arg0 into a 64 bit register, all zero extend.
   320  		// The generated instructions are indexed loads with no offset field in the instruction so the aux fields are not used.
   321  		// In these cases the index register field is set to 0 and the full address is in the base register.
   322  		{name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"}, // load 8 bytes reverse order
   323  		{name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes zero extend reverse order
   324  		{name: "MOVHBRload", argLength: 2, reg: gpload, asm: "MOVHBR", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes zero extend reverse order
   325  
   326  		// In these cases an index register is used in addition to a base register
   327  		// Loads from memory location arg[0] + arg[1].
   328  		{name: "MOVBZloadidx", argLength: 3, reg: gploadidx, asm: "MOVBZ", typ: "UInt8"},  // zero extend uint8 to uint64
   329  		{name: "MOVHloadidx", argLength: 3, reg: gploadidx, asm: "MOVH", typ: "Int16"},    // sign extend int16 to int64
   330  		{name: "MOVHZloadidx", argLength: 3, reg: gploadidx, asm: "MOVHZ", typ: "UInt16"}, // zero extend uint16 to uint64
   331  		{name: "MOVWloadidx", argLength: 3, reg: gploadidx, asm: "MOVW", typ: "Int32"},    // sign extend int32 to int64
   332  		{name: "MOVWZloadidx", argLength: 3, reg: gploadidx, asm: "MOVWZ", typ: "UInt32"}, // zero extend uint32 to uint64
   333  		{name: "MOVDloadidx", argLength: 3, reg: gploadidx, asm: "MOVD", typ: "Int64"},
   334  		{name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVHBR", typ: "Int16"}, // sign extend int16 to int64
   335  		{name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVWBR", typ: "Int32"}, // sign extend int32 to int64
   336  		{name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVDBR", typ: "Int64"},
   337  		{name: "FMOVDloadidx", argLength: 3, reg: fploadidx, asm: "FMOVD", typ: "Float64"},
   338  		{name: "FMOVSloadidx", argLength: 3, reg: fploadidx, asm: "FMOVS", typ: "Float32"},
   339  
   340  		// Prefetch instruction
   341  		// Do prefetch of address generated with arg0 and arg1 with option aux. arg0=addr,arg1=memory, aux=option.
   342  		{name: "DCBT", argLength: 2, aux: "Int64", reg: prefreg, asm: "DCBT", hasSideEffects: true},
   343  
   344  		// Store bytes in the reverse endian order of the arch into arg0.
   345  		// These are indexed stores with no offset field in the instruction so the auxint fields are not used.
   346  		{name: "MOVDBRstore", argLength: 3, reg: gpstore, asm: "MOVDBR", aux: "Sym", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes reverse order
   347  		{name: "MOVWBRstore", argLength: 3, reg: gpstore, asm: "MOVWBR", aux: "Sym", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes reverse order
   348  		{name: "MOVHBRstore", argLength: 3, reg: gpstore, asm: "MOVHBR", aux: "Sym", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes reverse order
   349  
   350  		// Floating point loads from arg0+aux+auxint
   351  		{name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load double float
   352  		{name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", aux: "SymOff", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load single float
   353  
   354  		// Store bytes in the endian order of the arch into arg0+aux+auxint
   355  		{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store byte
   356  		{name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes
   357  		{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes
   358  		{name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes
   359  
   360  		// Store floating point value into arg0+aux+auxint
   361  		{name: "FMOVDstore", argLength: 3, reg: fpstore, asm: "FMOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store double flot
   362  		{name: "FMOVSstore", argLength: 3, reg: fpstore, asm: "FMOVS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store single float
   363  
   364  		// Stores using index and base registers
   365  		// Stores to arg[0] + arg[1]
   366  		{name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVB", typ: "Mem"},     // store bye
   367  		{name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVH", typ: "Mem"},     // store half word
   368  		{name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVW", typ: "Mem"},     // store word
   369  		{name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVD", typ: "Mem"},     // store double word
   370  		{name: "FMOVDstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVD", typ: "Mem"},   // store double float
   371  		{name: "FMOVSstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVS", typ: "Mem"},   // store single float
   372  		{name: "MOVHBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVHBR", typ: "Mem"}, // store half word reversed byte using index reg
   373  		{name: "MOVWBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVWBR", typ: "Mem"}, // store word reversed byte using index reg
   374  		{name: "MOVDBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVDBR", typ: "Mem"}, // store double word reversed byte using index reg
   375  
   376  		// The following ops store 0 into arg0+aux+auxint arg1=mem
   377  		{name: "MOVBstorezero", argLength: 2, reg: gpstorezero, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 1 byte
   378  		{name: "MOVHstorezero", argLength: 2, reg: gpstorezero, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 2 bytes
   379  		{name: "MOVWstorezero", argLength: 2, reg: gpstorezero, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 4 bytes
   380  		{name: "MOVDstorezero", argLength: 2, reg: gpstorezero, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 8 bytes
   381  
   382  		{name: "MOVDaddr", argLength: 1, reg: regInfo{inputs: []regMask{sp | sb | gp}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVD", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB/GP
   383  
   384  		{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "Int64", rematerializeable: true}, //
   385  		{name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", rematerializeable: true},           //
   386  		{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float32", asm: "FMOVS", rematerializeable: true},           //
   387  		{name: "FCMPU", argLength: 2, reg: fp2cr, asm: "FCMPU", typ: "Flags"},
   388  
   389  		{name: "CMP", argLength: 2, reg: gp2cr, asm: "CMP", typ: "Flags"},     // arg0 compare to arg1
   390  		{name: "CMPU", argLength: 2, reg: gp2cr, asm: "CMPU", typ: "Flags"},   // arg0 compare to arg1
   391  		{name: "CMPW", argLength: 2, reg: gp2cr, asm: "CMPW", typ: "Flags"},   // arg0 compare to arg1
   392  		{name: "CMPWU", argLength: 2, reg: gp2cr, asm: "CMPWU", typ: "Flags"}, // arg0 compare to arg1
   393  		{name: "CMPconst", argLength: 1, reg: gp1cr, asm: "CMP", aux: "Int64", typ: "Flags"},
   394  		{name: "CMPUconst", argLength: 1, reg: gp1cr, asm: "CMPU", aux: "Int64", typ: "Flags"},
   395  		{name: "CMPWconst", argLength: 1, reg: gp1cr, asm: "CMPW", aux: "Int32", typ: "Flags"},
   396  		{name: "CMPWUconst", argLength: 1, reg: gp1cr, asm: "CMPWU", aux: "Int32", typ: "Flags"},
   397  
   398  		// ISEL auxInt values 0=LT 1=GT 2=EQ   arg2 ? arg0 : arg1
   399  		// ISEL auxInt values 4=GE 5=LE 6=NE   !arg2 ? arg1 : arg0
   400  		// ISELB special case where arg0, arg1 values are 0, 1 for boolean result
   401  		{name: "ISEL", argLength: 3, reg: crgp21, asm: "ISEL", aux: "Int32", typ: "Int32"},  // see above
   402  		{name: "ISELB", argLength: 2, reg: crgp11, asm: "ISEL", aux: "Int32", typ: "Int32"}, // see above
   403  
   404  		// pseudo-ops
   405  		{name: "Equal", argLength: 1, reg: crgp},         // bool, true flags encode x==y false otherwise.
   406  		{name: "NotEqual", argLength: 1, reg: crgp},      // bool, true flags encode x!=y false otherwise.
   407  		{name: "LessThan", argLength: 1, reg: crgp},      // bool, true flags encode  x<y false otherwise.
   408  		{name: "FLessThan", argLength: 1, reg: crgp},     // bool, true flags encode  x<y false otherwise.
   409  		{name: "LessEqual", argLength: 1, reg: crgp},     // bool, true flags encode  x<=y false otherwise.
   410  		{name: "FLessEqual", argLength: 1, reg: crgp},    // bool, true flags encode  x<=y false otherwise; PPC <= === !> which is wrong for NaN
   411  		{name: "GreaterThan", argLength: 1, reg: crgp},   // bool, true flags encode  x>y false otherwise.
   412  		{name: "FGreaterThan", argLength: 1, reg: crgp},  // bool, true flags encode  x>y false otherwise.
   413  		{name: "GreaterEqual", argLength: 1, reg: crgp},  // bool, true flags encode  x>=y false otherwise.
   414  		{name: "FGreaterEqual", argLength: 1, reg: crgp}, // bool, true flags encode  x>=y false otherwise.; PPC >= === !< which is wrong for NaN
   415  
   416  		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
   417  		// and sorts it to the very beginning of the block to prevent other
   418  		// use of the closure pointer.
   419  		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{ctxt}}, zeroWidth: true},
   420  
   421  		// LoweredGetCallerSP returns the SP of the caller of the current function.
   422  		{name: "LoweredGetCallerSP", reg: gp01, rematerializeable: true},
   423  
   424  		// LoweredGetCallerPC evaluates to the PC to which its "caller" will return.
   425  		// I.e., if f calls g "calls" getcallerpc,
   426  		// the result should be the PC within f that g will return to.
   427  		// See runtime/stubs.go for a more detailed discussion.
   428  		{name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true},
   429  
   430  		//arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
   431  		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
   432  		// Round ops to block fused-multiply-add extraction.
   433  		{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   434  		{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   435  
   436  		{name: "CALLstatic", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},                                       // call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
   437  		{name: "CALLtail", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true},                         // tail call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
   438  		{name: "CALLclosure", argLength: -1, reg: regInfo{inputs: []regMask{callptr, ctxt, 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
   439  		{name: "CALLinter", argLength: -1, reg: regInfo{inputs: []regMask{callptr}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},            // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
   440  
   441  		// large or unaligned zeroing
   442  		// arg0 = address of memory to zero (in R3, changed as side effect)
   443  		// returns mem
   444  		//
   445  		// a loop is generated when there is more than one iteration
   446  		// needed to clear 4 doublewords
   447  		//
   448  		//	XXLXOR	VS32,VS32,VS32
   449  		// 	MOVD	$len/32,R31
   450  		//	MOVD	R31,CTR
   451  		//	MOVD	$16,R31
   452  		//	loop:
   453  		//	STXVD2X VS32,(R0)(R3)
   454  		//	STXVD2X	VS32,(R31),R3)
   455  		//	ADD	R3,32
   456  		//	BC	loop
   457  
   458  		// remaining doubleword clears generated as needed
   459  		//	MOVD	R0,(R3)
   460  		//	MOVD	R0,8(R3)
   461  		//	MOVD	R0,16(R3)
   462  		//	MOVD	R0,24(R3)
   463  
   464  		// one or more of these to clear remainder < 8 bytes
   465  		//	MOVW	R0,n1(R3)
   466  		//	MOVH	R0,n2(R3)
   467  		//	MOVB	R0,n3(R3)
   468  		{
   469  			name:      "LoweredZero",
   470  			aux:       "Int64",
   471  			argLength: 2,
   472  			reg: regInfo{
   473  				inputs:   []regMask{buildReg("R20")},
   474  				clobbers: buildReg("R20"),
   475  			},
   476  			clobberFlags:   true,
   477  			typ:            "Mem",
   478  			faultOnNilArg0: true,
   479  			unsafePoint:    true,
   480  		},
   481  		{
   482  			name:      "LoweredZeroShort",
   483  			aux:       "Int64",
   484  			argLength: 2,
   485  			reg: regInfo{
   486  				inputs: []regMask{gp}},
   487  			typ:            "Mem",
   488  			faultOnNilArg0: true,
   489  			unsafePoint:    true,
   490  		},
   491  		{
   492  			name:      "LoweredQuadZeroShort",
   493  			aux:       "Int64",
   494  			argLength: 2,
   495  			reg: regInfo{
   496  				inputs: []regMask{gp},
   497  			},
   498  			typ:            "Mem",
   499  			faultOnNilArg0: true,
   500  			unsafePoint:    true,
   501  		},
   502  		{
   503  			name:      "LoweredQuadZero",
   504  			aux:       "Int64",
   505  			argLength: 2,
   506  			reg: regInfo{
   507  				inputs:   []regMask{buildReg("R20")},
   508  				clobbers: buildReg("R20"),
   509  			},
   510  			clobberFlags:   true,
   511  			typ:            "Mem",
   512  			faultOnNilArg0: true,
   513  			unsafePoint:    true,
   514  		},
   515  
   516  		// R31 is temp register
   517  		// Loop code:
   518  		//	MOVD len/32,R31		set up loop ctr
   519  		//	MOVD R31,CTR
   520  		//	MOVD $16,R31		index register
   521  		// loop:
   522  		//	LXVD2X (R0)(R4),VS32
   523  		//	LXVD2X (R31)(R4),VS33
   524  		//	ADD  R4,$32          increment src
   525  		//	STXVD2X VS32,(R0)(R3)
   526  		//	STXVD2X VS33,(R31)(R3)
   527  		//	ADD  R3,$32          increment dst
   528  		//	BC 16,0,loop         branch ctr
   529  		// For this purpose, VS32 and VS33 are treated as
   530  		// scratch registers. Since regalloc does not
   531  		// track vector registers, even if it could be marked
   532  		// as clobbered it would have no effect.
   533  		// TODO: If vector registers are managed by regalloc
   534  		// mark these as clobbered.
   535  		//
   536  		// Bytes not moved by this loop are moved
   537  		// with a combination of the following instructions,
   538  		// starting with the largest sizes and generating as
   539  		// many as needed, using the appropriate offset value.
   540  		//	MOVD  n(R4),R14
   541  		//	MOVD  R14,n(R3)
   542  		//	MOVW  n1(R4),R14
   543  		//	MOVW  R14,n1(R3)
   544  		//	MOVH  n2(R4),R14
   545  		//	MOVH  R14,n2(R3)
   546  		//	MOVB  n3(R4),R14
   547  		//	MOVB  R14,n3(R3)
   548  
   549  		{
   550  			name:      "LoweredMove",
   551  			aux:       "Int64",
   552  			argLength: 3,
   553  			reg: regInfo{
   554  				inputs:   []regMask{buildReg("R20"), buildReg("R21")},
   555  				clobbers: buildReg("R20 R21"),
   556  			},
   557  			clobberFlags:   true,
   558  			typ:            "Mem",
   559  			faultOnNilArg0: true,
   560  			faultOnNilArg1: true,
   561  			unsafePoint:    true,
   562  		},
   563  		{
   564  			name:      "LoweredMoveShort",
   565  			aux:       "Int64",
   566  			argLength: 3,
   567  			reg: regInfo{
   568  				inputs: []regMask{gp, gp},
   569  			},
   570  			typ:            "Mem",
   571  			faultOnNilArg0: true,
   572  			faultOnNilArg1: true,
   573  			unsafePoint:    true,
   574  		},
   575  
   576  		// The following is similar to the LoweredMove, but uses
   577  		// LXV instead of LXVD2X, which does not require an index
   578  		// register and will do 4 in a loop instead of only.
   579  		{
   580  			name:      "LoweredQuadMove",
   581  			aux:       "Int64",
   582  			argLength: 3,
   583  			reg: regInfo{
   584  				inputs:   []regMask{buildReg("R20"), buildReg("R21")},
   585  				clobbers: buildReg("R20 R21"),
   586  			},
   587  			clobberFlags:   true,
   588  			typ:            "Mem",
   589  			faultOnNilArg0: true,
   590  			faultOnNilArg1: true,
   591  			unsafePoint:    true,
   592  		},
   593  
   594  		{
   595  			name:      "LoweredQuadMoveShort",
   596  			aux:       "Int64",
   597  			argLength: 3,
   598  			reg: regInfo{
   599  				inputs: []regMask{gp, gp},
   600  			},
   601  			typ:            "Mem",
   602  			faultOnNilArg0: true,
   603  			faultOnNilArg1: true,
   604  			unsafePoint:    true,
   605  		},
   606  
   607  		{name: "LoweredAtomicStore8", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   608  		{name: "LoweredAtomicStore32", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   609  		{name: "LoweredAtomicStore64", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   610  
   611  		{name: "LoweredAtomicLoad8", argLength: 2, reg: gpload, typ: "UInt8", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   612  		{name: "LoweredAtomicLoad32", argLength: 2, reg: gpload, typ: "UInt32", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   613  		{name: "LoweredAtomicLoad64", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   614  		{name: "LoweredAtomicLoadPtr", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   615  
   616  		// atomic add32, 64
   617  		// LWSYNC
   618  		// LDAR         (Rarg0), Rout
   619  		// ADD		Rarg1, Rout
   620  		// STDCCC       Rout, (Rarg0)
   621  		// BNE          -3(PC)
   622  		// return new sum
   623  		{name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   624  		{name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   625  
   626  		// atomic exchange32, 64
   627  		// LWSYNC
   628  		// LDAR         (Rarg0), Rout
   629  		// STDCCC       Rarg1, (Rarg0)
   630  		// BNE          -2(PC)
   631  		// ISYNC
   632  		// return old val
   633  		{name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   634  		{name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   635  
   636  		// atomic compare and swap.
   637  		// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero.
   638  		// if *arg0 == arg1 {
   639  		//   *arg0 = arg2
   640  		//   return (true, memory)
   641  		// } else {
   642  		//   return (false, memory)
   643  		// }
   644  		// SYNC
   645  		// LDAR		(Rarg0), Rtmp
   646  		// CMP		Rarg1, Rtmp
   647  		// BNE		3(PC)
   648  		// STDCCC	Rarg2, (Rarg0)
   649  		// BNE		-4(PC)
   650  		// CBNZ         Rtmp, -4(PC)
   651  		// CSET         EQ, Rout
   652  		{name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   653  		{name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   654  
   655  		// atomic 8/32 and/or.
   656  		// *arg0 &= (|=) arg1. arg2=mem. returns memory. auxint must be zero.
   657  		// LBAR/LWAT	(Rarg0), Rtmp
   658  		// AND/OR	Rarg1, Rtmp
   659  		// STBCCC/STWCCC Rtmp, (Rarg0), Rtmp
   660  		// BNE		Rtmp, -3(PC)
   661  		{name: "LoweredAtomicAnd8", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
   662  		{name: "LoweredAtomicAnd32", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
   663  		{name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
   664  		{name: "LoweredAtomicOr32", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
   665  
   666  		// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
   667  		// It preserves R0 through R17 (except special registers R1, R2, R11, R12, R13), g, and its arguments R20 and R21,
   668  		// but may clobber anything else, including R31 (REGTMP).
   669  		{name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("R20"), buildReg("R21")}, clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17 R20 R21 g")) | buildReg("R31")}, clobberFlags: true, aux: "Sym", symEffect: "None"},
   670  
   671  		// There are three of these functions so that they can have three different register inputs.
   672  		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
   673  		// default registers to match so we don't need to copy registers around unnecessarily.
   674  		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r5, r6}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
   675  		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r5}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
   676  		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r3, r4}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
   677  
   678  		// (InvertFlags (CMP a b)) == (CMP b a)
   679  		// So if we want (LessThan (CMP a b)) but we can't do that because a is a constant,
   680  		// then we do (LessThan (InvertFlags (CMP b a))) instead.
   681  		// Rewrites will convert this to (GreaterThan (CMP b a)).
   682  		// InvertFlags is a pseudo-op which can't appear in assembly output.
   683  		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
   684  
   685  		// Constant flag values. For any comparison, there are 3 possible
   686  		// outcomes: either the three from the signed total order (<,==,>)
   687  		// or the three from the unsigned total order, depending on which
   688  		// comparison operation was used (CMP or CMPU -- PPC is different from
   689  		// the other architectures, which have a single comparison producing
   690  		// both signed and unsigned comparison results.)
   691  
   692  		// These ops are for temporary use by rewrite rules. They
   693  		// cannot appear in the generated assembly.
   694  		{name: "FlagEQ"}, // equal
   695  		{name: "FlagLT"}, // signed < or unsigned <
   696  		{name: "FlagGT"}, // signed > or unsigned >
   697  	}
   698  
   699  	blocks := []blockData{
   700  		{name: "EQ", controls: 1},
   701  		{name: "NE", controls: 1},
   702  		{name: "LT", controls: 1},
   703  		{name: "LE", controls: 1},
   704  		{name: "GT", controls: 1},
   705  		{name: "GE", controls: 1},
   706  		{name: "FLT", controls: 1},
   707  		{name: "FLE", controls: 1},
   708  		{name: "FGT", controls: 1},
   709  		{name: "FGE", controls: 1},
   710  	}
   711  
   712  	archs = append(archs, arch{
   713  		name:               "PPC64",
   714  		pkg:                "cmd/internal/obj/ppc64",
   715  		genfile:            "../../ppc64/ssa.go",
   716  		ops:                ops,
   717  		blocks:             blocks,
   718  		regnames:           regNamesPPC64,
   719  		ParamIntRegNames:   "R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17",
   720  		ParamFloatRegNames: "F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12",
   721  		gpregmask:          gp,
   722  		fpregmask:          fp,
   723  		framepointerreg:    -1,
   724  		linkreg:            -1, // not used
   725  	})
   726  }
   727  

View as plain text