Source file src/cmd/compile/internal/arm64/ssa.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package arm64
     6  
     7  import (
     8  	"math"
     9  
    10  	"cmd/compile/internal/base"
    11  	"cmd/compile/internal/ir"
    12  	"cmd/compile/internal/logopt"
    13  	"cmd/compile/internal/objw"
    14  	"cmd/compile/internal/ssa"
    15  	"cmd/compile/internal/ssagen"
    16  	"cmd/compile/internal/types"
    17  	"cmd/internal/obj"
    18  	"cmd/internal/obj/arm64"
    19  )
    20  
    21  // loadByType returns the load instruction of the given type.
    22  func loadByType(t *types.Type) obj.As {
    23  	if t.IsFloat() {
    24  		switch t.Size() {
    25  		case 4:
    26  			return arm64.AFMOVS
    27  		case 8:
    28  			return arm64.AFMOVD
    29  		}
    30  	} else {
    31  		switch t.Size() {
    32  		case 1:
    33  			if t.IsSigned() {
    34  				return arm64.AMOVB
    35  			} else {
    36  				return arm64.AMOVBU
    37  			}
    38  		case 2:
    39  			if t.IsSigned() {
    40  				return arm64.AMOVH
    41  			} else {
    42  				return arm64.AMOVHU
    43  			}
    44  		case 4:
    45  			if t.IsSigned() {
    46  				return arm64.AMOVW
    47  			} else {
    48  				return arm64.AMOVWU
    49  			}
    50  		case 8:
    51  			return arm64.AMOVD
    52  		}
    53  	}
    54  	panic("bad load type")
    55  }
    56  
    57  // storeByType returns the store instruction of the given type.
    58  func storeByType(t *types.Type) obj.As {
    59  	if t.IsFloat() {
    60  		switch t.Size() {
    61  		case 4:
    62  			return arm64.AFMOVS
    63  		case 8:
    64  			return arm64.AFMOVD
    65  		}
    66  	} else {
    67  		switch t.Size() {
    68  		case 1:
    69  			return arm64.AMOVB
    70  		case 2:
    71  			return arm64.AMOVH
    72  		case 4:
    73  			return arm64.AMOVW
    74  		case 8:
    75  			return arm64.AMOVD
    76  		}
    77  	}
    78  	panic("bad store type")
    79  }
    80  
    81  // makeshift encodes a register shifted by a constant, used as an Offset in Prog
    82  func makeshift(v *ssa.Value, reg int16, typ int64, s int64) int64 {
    83  	if s < 0 || s >= 64 {
    84  		v.Fatalf("shift out of range: %d", s)
    85  	}
    86  	return int64(reg&31)<<16 | typ | (s&63)<<10
    87  }
    88  
    89  // genshift generates a Prog for r = r0 op (r1 shifted by n)
    90  func genshift(s *ssagen.State, v *ssa.Value, as obj.As, r0, r1, r int16, typ int64, n int64) *obj.Prog {
    91  	p := s.Prog(as)
    92  	p.From.Type = obj.TYPE_SHIFT
    93  	p.From.Offset = makeshift(v, r1, typ, n)
    94  	p.Reg = r0
    95  	if r != 0 {
    96  		p.To.Type = obj.TYPE_REG
    97  		p.To.Reg = r
    98  	}
    99  	return p
   100  }
   101  
   102  // generate the memory operand for the indexed load/store instructions
   103  func genIndexedOperand(v *ssa.Value) obj.Addr {
   104  	// Reg: base register, Index: (shifted) index register
   105  	mop := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
   106  	switch v.Op {
   107  	case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8,
   108  		ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8:
   109  		mop.Index = arm64.REG_LSL | 3<<5 | v.Args[1].Reg()&31
   110  	case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4,
   111  		ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4:
   112  		mop.Index = arm64.REG_LSL | 2<<5 | v.Args[1].Reg()&31
   113  	case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVHstorezeroidx2:
   114  		mop.Index = arm64.REG_LSL | 1<<5 | v.Args[1].Reg()&31
   115  	default: // not shifted
   116  		mop.Index = v.Args[1].Reg()
   117  	}
   118  	return mop
   119  }
   120  
   121  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   122  	switch v.Op {
   123  	case ssa.OpCopy, ssa.OpARM64MOVDreg:
   124  		if v.Type.IsMemory() {
   125  			return
   126  		}
   127  		x := v.Args[0].Reg()
   128  		y := v.Reg()
   129  		if x == y {
   130  			return
   131  		}
   132  		as := arm64.AMOVD
   133  		if v.Type.IsFloat() {
   134  			switch v.Type.Size() {
   135  			case 4:
   136  				as = arm64.AFMOVS
   137  			case 8:
   138  				as = arm64.AFMOVD
   139  			default:
   140  				panic("bad float size")
   141  			}
   142  		}
   143  		p := s.Prog(as)
   144  		p.From.Type = obj.TYPE_REG
   145  		p.From.Reg = x
   146  		p.To.Type = obj.TYPE_REG
   147  		p.To.Reg = y
   148  	case ssa.OpARM64MOVDnop:
   149  		// nothing to do
   150  	case ssa.OpLoadReg:
   151  		if v.Type.IsFlags() {
   152  			v.Fatalf("load flags not implemented: %v", v.LongString())
   153  			return
   154  		}
   155  		p := s.Prog(loadByType(v.Type))
   156  		ssagen.AddrAuto(&p.From, v.Args[0])
   157  		p.To.Type = obj.TYPE_REG
   158  		p.To.Reg = v.Reg()
   159  	case ssa.OpStoreReg:
   160  		if v.Type.IsFlags() {
   161  			v.Fatalf("store flags not implemented: %v", v.LongString())
   162  			return
   163  		}
   164  		p := s.Prog(storeByType(v.Type))
   165  		p.From.Type = obj.TYPE_REG
   166  		p.From.Reg = v.Args[0].Reg()
   167  		ssagen.AddrAuto(&p.To, v)
   168  	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
   169  		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
   170  		// The loop only runs once.
   171  		for _, a := range v.Block.Func.RegArgs {
   172  			// Pass the spill/unspill information along to the assembler, offset by size of
   173  			// the saved LR slot.
   174  			addr := ssagen.SpillSlotAddr(a, arm64.REGSP, base.Ctxt.FixedFrameSize())
   175  			s.FuncInfo().AddSpill(
   176  				obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
   177  		}
   178  		v.Block.Func.RegArgs = nil
   179  		ssagen.CheckArgReg(v)
   180  	case ssa.OpARM64ADD,
   181  		ssa.OpARM64SUB,
   182  		ssa.OpARM64AND,
   183  		ssa.OpARM64OR,
   184  		ssa.OpARM64XOR,
   185  		ssa.OpARM64BIC,
   186  		ssa.OpARM64EON,
   187  		ssa.OpARM64ORN,
   188  		ssa.OpARM64MUL,
   189  		ssa.OpARM64MULW,
   190  		ssa.OpARM64MNEG,
   191  		ssa.OpARM64MNEGW,
   192  		ssa.OpARM64MULH,
   193  		ssa.OpARM64UMULH,
   194  		ssa.OpARM64MULL,
   195  		ssa.OpARM64UMULL,
   196  		ssa.OpARM64DIV,
   197  		ssa.OpARM64UDIV,
   198  		ssa.OpARM64DIVW,
   199  		ssa.OpARM64UDIVW,
   200  		ssa.OpARM64MOD,
   201  		ssa.OpARM64UMOD,
   202  		ssa.OpARM64MODW,
   203  		ssa.OpARM64UMODW,
   204  		ssa.OpARM64SLL,
   205  		ssa.OpARM64SRL,
   206  		ssa.OpARM64SRA,
   207  		ssa.OpARM64FADDS,
   208  		ssa.OpARM64FADDD,
   209  		ssa.OpARM64FSUBS,
   210  		ssa.OpARM64FSUBD,
   211  		ssa.OpARM64FMULS,
   212  		ssa.OpARM64FMULD,
   213  		ssa.OpARM64FNMULS,
   214  		ssa.OpARM64FNMULD,
   215  		ssa.OpARM64FDIVS,
   216  		ssa.OpARM64FDIVD,
   217  		ssa.OpARM64ROR,
   218  		ssa.OpARM64RORW:
   219  		r := v.Reg()
   220  		r1 := v.Args[0].Reg()
   221  		r2 := v.Args[1].Reg()
   222  		p := s.Prog(v.Op.Asm())
   223  		p.From.Type = obj.TYPE_REG
   224  		p.From.Reg = r2
   225  		p.Reg = r1
   226  		p.To.Type = obj.TYPE_REG
   227  		p.To.Reg = r
   228  	case ssa.OpARM64FMADDS,
   229  		ssa.OpARM64FMADDD,
   230  		ssa.OpARM64FNMADDS,
   231  		ssa.OpARM64FNMADDD,
   232  		ssa.OpARM64FMSUBS,
   233  		ssa.OpARM64FMSUBD,
   234  		ssa.OpARM64FNMSUBS,
   235  		ssa.OpARM64FNMSUBD,
   236  		ssa.OpARM64MADD,
   237  		ssa.OpARM64MADDW,
   238  		ssa.OpARM64MSUB,
   239  		ssa.OpARM64MSUBW:
   240  		rt := v.Reg()
   241  		ra := v.Args[0].Reg()
   242  		rm := v.Args[1].Reg()
   243  		rn := v.Args[2].Reg()
   244  		p := s.Prog(v.Op.Asm())
   245  		p.Reg = ra
   246  		p.From.Type = obj.TYPE_REG
   247  		p.From.Reg = rm
   248  		p.SetFrom3Reg(rn)
   249  		p.To.Type = obj.TYPE_REG
   250  		p.To.Reg = rt
   251  	case ssa.OpARM64ADDconst,
   252  		ssa.OpARM64SUBconst,
   253  		ssa.OpARM64ANDconst,
   254  		ssa.OpARM64ORconst,
   255  		ssa.OpARM64XORconst,
   256  		ssa.OpARM64SLLconst,
   257  		ssa.OpARM64SRLconst,
   258  		ssa.OpARM64SRAconst,
   259  		ssa.OpARM64RORconst,
   260  		ssa.OpARM64RORWconst:
   261  		p := s.Prog(v.Op.Asm())
   262  		p.From.Type = obj.TYPE_CONST
   263  		p.From.Offset = v.AuxInt
   264  		p.Reg = v.Args[0].Reg()
   265  		p.To.Type = obj.TYPE_REG
   266  		p.To.Reg = v.Reg()
   267  	case ssa.OpARM64ADDSconstflags:
   268  		p := s.Prog(v.Op.Asm())
   269  		p.From.Type = obj.TYPE_CONST
   270  		p.From.Offset = v.AuxInt
   271  		p.Reg = v.Args[0].Reg()
   272  		p.To.Type = obj.TYPE_REG
   273  		p.To.Reg = v.Reg0()
   274  	case ssa.OpARM64ADCzerocarry:
   275  		p := s.Prog(v.Op.Asm())
   276  		p.From.Type = obj.TYPE_REG
   277  		p.From.Reg = arm64.REGZERO
   278  		p.Reg = arm64.REGZERO
   279  		p.To.Type = obj.TYPE_REG
   280  		p.To.Reg = v.Reg()
   281  	case ssa.OpARM64ADCSflags,
   282  		ssa.OpARM64ADDSflags,
   283  		ssa.OpARM64SBCSflags,
   284  		ssa.OpARM64SUBSflags:
   285  		r := v.Reg0()
   286  		r1 := v.Args[0].Reg()
   287  		r2 := v.Args[1].Reg()
   288  		p := s.Prog(v.Op.Asm())
   289  		p.From.Type = obj.TYPE_REG
   290  		p.From.Reg = r2
   291  		p.Reg = r1
   292  		p.To.Type = obj.TYPE_REG
   293  		p.To.Reg = r
   294  	case ssa.OpARM64NEGSflags:
   295  		p := s.Prog(v.Op.Asm())
   296  		p.From.Type = obj.TYPE_REG
   297  		p.From.Reg = v.Args[0].Reg()
   298  		p.To.Type = obj.TYPE_REG
   299  		p.To.Reg = v.Reg0()
   300  	case ssa.OpARM64NGCzerocarry:
   301  		p := s.Prog(v.Op.Asm())
   302  		p.From.Type = obj.TYPE_REG
   303  		p.From.Reg = arm64.REGZERO
   304  		p.To.Type = obj.TYPE_REG
   305  		p.To.Reg = v.Reg()
   306  	case ssa.OpARM64EXTRconst,
   307  		ssa.OpARM64EXTRWconst:
   308  		p := s.Prog(v.Op.Asm())
   309  		p.From.Type = obj.TYPE_CONST
   310  		p.From.Offset = v.AuxInt
   311  		p.SetFrom3Reg(v.Args[0].Reg())
   312  		p.Reg = v.Args[1].Reg()
   313  		p.To.Type = obj.TYPE_REG
   314  		p.To.Reg = v.Reg()
   315  	case ssa.OpARM64MVNshiftLL, ssa.OpARM64NEGshiftLL:
   316  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_LL, v.AuxInt)
   317  	case ssa.OpARM64MVNshiftRL, ssa.OpARM64NEGshiftRL:
   318  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_LR, v.AuxInt)
   319  	case ssa.OpARM64MVNshiftRA, ssa.OpARM64NEGshiftRA:
   320  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_AR, v.AuxInt)
   321  	case ssa.OpARM64MVNshiftRO:
   322  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_ROR, v.AuxInt)
   323  	case ssa.OpARM64ADDshiftLL,
   324  		ssa.OpARM64SUBshiftLL,
   325  		ssa.OpARM64ANDshiftLL,
   326  		ssa.OpARM64ORshiftLL,
   327  		ssa.OpARM64XORshiftLL,
   328  		ssa.OpARM64EONshiftLL,
   329  		ssa.OpARM64ORNshiftLL,
   330  		ssa.OpARM64BICshiftLL:
   331  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_LL, v.AuxInt)
   332  	case ssa.OpARM64ADDshiftRL,
   333  		ssa.OpARM64SUBshiftRL,
   334  		ssa.OpARM64ANDshiftRL,
   335  		ssa.OpARM64ORshiftRL,
   336  		ssa.OpARM64XORshiftRL,
   337  		ssa.OpARM64EONshiftRL,
   338  		ssa.OpARM64ORNshiftRL,
   339  		ssa.OpARM64BICshiftRL:
   340  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_LR, v.AuxInt)
   341  	case ssa.OpARM64ADDshiftRA,
   342  		ssa.OpARM64SUBshiftRA,
   343  		ssa.OpARM64ANDshiftRA,
   344  		ssa.OpARM64ORshiftRA,
   345  		ssa.OpARM64XORshiftRA,
   346  		ssa.OpARM64EONshiftRA,
   347  		ssa.OpARM64ORNshiftRA,
   348  		ssa.OpARM64BICshiftRA:
   349  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_AR, v.AuxInt)
   350  	case ssa.OpARM64ANDshiftRO,
   351  		ssa.OpARM64ORshiftRO,
   352  		ssa.OpARM64XORshiftRO,
   353  		ssa.OpARM64EONshiftRO,
   354  		ssa.OpARM64ORNshiftRO,
   355  		ssa.OpARM64BICshiftRO:
   356  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_ROR, v.AuxInt)
   357  	case ssa.OpARM64MOVDconst:
   358  		p := s.Prog(v.Op.Asm())
   359  		p.From.Type = obj.TYPE_CONST
   360  		p.From.Offset = v.AuxInt
   361  		p.To.Type = obj.TYPE_REG
   362  		p.To.Reg = v.Reg()
   363  	case ssa.OpARM64FMOVSconst,
   364  		ssa.OpARM64FMOVDconst:
   365  		p := s.Prog(v.Op.Asm())
   366  		p.From.Type = obj.TYPE_FCONST
   367  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   368  		p.To.Type = obj.TYPE_REG
   369  		p.To.Reg = v.Reg()
   370  	case ssa.OpARM64FCMPS0,
   371  		ssa.OpARM64FCMPD0:
   372  		p := s.Prog(v.Op.Asm())
   373  		p.From.Type = obj.TYPE_FCONST
   374  		p.From.Val = math.Float64frombits(0)
   375  		p.Reg = v.Args[0].Reg()
   376  	case ssa.OpARM64CMP,
   377  		ssa.OpARM64CMPW,
   378  		ssa.OpARM64CMN,
   379  		ssa.OpARM64CMNW,
   380  		ssa.OpARM64TST,
   381  		ssa.OpARM64TSTW,
   382  		ssa.OpARM64FCMPS,
   383  		ssa.OpARM64FCMPD:
   384  		p := s.Prog(v.Op.Asm())
   385  		p.From.Type = obj.TYPE_REG
   386  		p.From.Reg = v.Args[1].Reg()
   387  		p.Reg = v.Args[0].Reg()
   388  	case ssa.OpARM64CMPconst,
   389  		ssa.OpARM64CMPWconst,
   390  		ssa.OpARM64CMNconst,
   391  		ssa.OpARM64CMNWconst,
   392  		ssa.OpARM64TSTconst,
   393  		ssa.OpARM64TSTWconst:
   394  		p := s.Prog(v.Op.Asm())
   395  		p.From.Type = obj.TYPE_CONST
   396  		p.From.Offset = v.AuxInt
   397  		p.Reg = v.Args[0].Reg()
   398  	case ssa.OpARM64CMPshiftLL, ssa.OpARM64CMNshiftLL, ssa.OpARM64TSTshiftLL:
   399  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_LL, v.AuxInt)
   400  	case ssa.OpARM64CMPshiftRL, ssa.OpARM64CMNshiftRL, ssa.OpARM64TSTshiftRL:
   401  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_LR, v.AuxInt)
   402  	case ssa.OpARM64CMPshiftRA, ssa.OpARM64CMNshiftRA, ssa.OpARM64TSTshiftRA:
   403  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_AR, v.AuxInt)
   404  	case ssa.OpARM64TSTshiftRO:
   405  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_ROR, v.AuxInt)
   406  	case ssa.OpARM64MOVDaddr:
   407  		p := s.Prog(arm64.AMOVD)
   408  		p.From.Type = obj.TYPE_ADDR
   409  		p.From.Reg = v.Args[0].Reg()
   410  		p.To.Type = obj.TYPE_REG
   411  		p.To.Reg = v.Reg()
   412  
   413  		var wantreg string
   414  		// MOVD $sym+off(base), R
   415  		// the assembler expands it as the following:
   416  		// - base is SP: add constant offset to SP (R13)
   417  		//               when constant is large, tmp register (R11) may be used
   418  		// - base is SB: load external address from constant pool (use relocation)
   419  		switch v.Aux.(type) {
   420  		default:
   421  			v.Fatalf("aux is of unknown type %T", v.Aux)
   422  		case *obj.LSym:
   423  			wantreg = "SB"
   424  			ssagen.AddAux(&p.From, v)
   425  		case *ir.Name:
   426  			wantreg = "SP"
   427  			ssagen.AddAux(&p.From, v)
   428  		case nil:
   429  			// No sym, just MOVD $off(SP), R
   430  			wantreg = "SP"
   431  			p.From.Offset = v.AuxInt
   432  		}
   433  		if reg := v.Args[0].RegName(); reg != wantreg {
   434  			v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg)
   435  		}
   436  	case ssa.OpARM64MOVBload,
   437  		ssa.OpARM64MOVBUload,
   438  		ssa.OpARM64MOVHload,
   439  		ssa.OpARM64MOVHUload,
   440  		ssa.OpARM64MOVWload,
   441  		ssa.OpARM64MOVWUload,
   442  		ssa.OpARM64MOVDload,
   443  		ssa.OpARM64FMOVSload,
   444  		ssa.OpARM64FMOVDload:
   445  		p := s.Prog(v.Op.Asm())
   446  		p.From.Type = obj.TYPE_MEM
   447  		p.From.Reg = v.Args[0].Reg()
   448  		ssagen.AddAux(&p.From, v)
   449  		p.To.Type = obj.TYPE_REG
   450  		p.To.Reg = v.Reg()
   451  	case ssa.OpARM64MOVBloadidx,
   452  		ssa.OpARM64MOVBUloadidx,
   453  		ssa.OpARM64MOVHloadidx,
   454  		ssa.OpARM64MOVHUloadidx,
   455  		ssa.OpARM64MOVWloadidx,
   456  		ssa.OpARM64MOVWUloadidx,
   457  		ssa.OpARM64MOVDloadidx,
   458  		ssa.OpARM64FMOVSloadidx,
   459  		ssa.OpARM64FMOVDloadidx,
   460  		ssa.OpARM64MOVHloadidx2,
   461  		ssa.OpARM64MOVHUloadidx2,
   462  		ssa.OpARM64MOVWloadidx4,
   463  		ssa.OpARM64MOVWUloadidx4,
   464  		ssa.OpARM64MOVDloadidx8,
   465  		ssa.OpARM64FMOVDloadidx8,
   466  		ssa.OpARM64FMOVSloadidx4:
   467  		p := s.Prog(v.Op.Asm())
   468  		p.From = genIndexedOperand(v)
   469  		p.To.Type = obj.TYPE_REG
   470  		p.To.Reg = v.Reg()
   471  	case ssa.OpARM64LDAR,
   472  		ssa.OpARM64LDARB,
   473  		ssa.OpARM64LDARW:
   474  		p := s.Prog(v.Op.Asm())
   475  		p.From.Type = obj.TYPE_MEM
   476  		p.From.Reg = v.Args[0].Reg()
   477  		ssagen.AddAux(&p.From, v)
   478  		p.To.Type = obj.TYPE_REG
   479  		p.To.Reg = v.Reg0()
   480  	case ssa.OpARM64MOVBstore,
   481  		ssa.OpARM64MOVHstore,
   482  		ssa.OpARM64MOVWstore,
   483  		ssa.OpARM64MOVDstore,
   484  		ssa.OpARM64FMOVSstore,
   485  		ssa.OpARM64FMOVDstore,
   486  		ssa.OpARM64STLRB,
   487  		ssa.OpARM64STLR,
   488  		ssa.OpARM64STLRW:
   489  		p := s.Prog(v.Op.Asm())
   490  		p.From.Type = obj.TYPE_REG
   491  		p.From.Reg = v.Args[1].Reg()
   492  		p.To.Type = obj.TYPE_MEM
   493  		p.To.Reg = v.Args[0].Reg()
   494  		ssagen.AddAux(&p.To, v)
   495  	case ssa.OpARM64MOVBstoreidx,
   496  		ssa.OpARM64MOVHstoreidx,
   497  		ssa.OpARM64MOVWstoreidx,
   498  		ssa.OpARM64MOVDstoreidx,
   499  		ssa.OpARM64FMOVSstoreidx,
   500  		ssa.OpARM64FMOVDstoreidx,
   501  		ssa.OpARM64MOVHstoreidx2,
   502  		ssa.OpARM64MOVWstoreidx4,
   503  		ssa.OpARM64FMOVSstoreidx4,
   504  		ssa.OpARM64MOVDstoreidx8,
   505  		ssa.OpARM64FMOVDstoreidx8:
   506  		p := s.Prog(v.Op.Asm())
   507  		p.To = genIndexedOperand(v)
   508  		p.From.Type = obj.TYPE_REG
   509  		p.From.Reg = v.Args[2].Reg()
   510  	case ssa.OpARM64STP:
   511  		p := s.Prog(v.Op.Asm())
   512  		p.From.Type = obj.TYPE_REGREG
   513  		p.From.Reg = v.Args[1].Reg()
   514  		p.From.Offset = int64(v.Args[2].Reg())
   515  		p.To.Type = obj.TYPE_MEM
   516  		p.To.Reg = v.Args[0].Reg()
   517  		ssagen.AddAux(&p.To, v)
   518  	case ssa.OpARM64MOVBstorezero,
   519  		ssa.OpARM64MOVHstorezero,
   520  		ssa.OpARM64MOVWstorezero,
   521  		ssa.OpARM64MOVDstorezero:
   522  		p := s.Prog(v.Op.Asm())
   523  		p.From.Type = obj.TYPE_REG
   524  		p.From.Reg = arm64.REGZERO
   525  		p.To.Type = obj.TYPE_MEM
   526  		p.To.Reg = v.Args[0].Reg()
   527  		ssagen.AddAux(&p.To, v)
   528  	case ssa.OpARM64MOVBstorezeroidx,
   529  		ssa.OpARM64MOVHstorezeroidx,
   530  		ssa.OpARM64MOVWstorezeroidx,
   531  		ssa.OpARM64MOVDstorezeroidx,
   532  		ssa.OpARM64MOVHstorezeroidx2,
   533  		ssa.OpARM64MOVWstorezeroidx4,
   534  		ssa.OpARM64MOVDstorezeroidx8:
   535  		p := s.Prog(v.Op.Asm())
   536  		p.To = genIndexedOperand(v)
   537  		p.From.Type = obj.TYPE_REG
   538  		p.From.Reg = arm64.REGZERO
   539  	case ssa.OpARM64MOVQstorezero:
   540  		p := s.Prog(v.Op.Asm())
   541  		p.From.Type = obj.TYPE_REGREG
   542  		p.From.Reg = arm64.REGZERO
   543  		p.From.Offset = int64(arm64.REGZERO)
   544  		p.To.Type = obj.TYPE_MEM
   545  		p.To.Reg = v.Args[0].Reg()
   546  		ssagen.AddAux(&p.To, v)
   547  	case ssa.OpARM64BFI,
   548  		ssa.OpARM64BFXIL:
   549  		p := s.Prog(v.Op.Asm())
   550  		p.From.Type = obj.TYPE_CONST
   551  		p.From.Offset = v.AuxInt >> 8
   552  		p.SetFrom3Const(v.AuxInt & 0xff)
   553  		p.Reg = v.Args[1].Reg()
   554  		p.To.Type = obj.TYPE_REG
   555  		p.To.Reg = v.Reg()
   556  	case ssa.OpARM64SBFIZ,
   557  		ssa.OpARM64SBFX,
   558  		ssa.OpARM64UBFIZ,
   559  		ssa.OpARM64UBFX:
   560  		p := s.Prog(v.Op.Asm())
   561  		p.From.Type = obj.TYPE_CONST
   562  		p.From.Offset = v.AuxInt >> 8
   563  		p.SetFrom3Const(v.AuxInt & 0xff)
   564  		p.Reg = v.Args[0].Reg()
   565  		p.To.Type = obj.TYPE_REG
   566  		p.To.Reg = v.Reg()
   567  	case ssa.OpARM64LoweredMuluhilo:
   568  		r0 := v.Args[0].Reg()
   569  		r1 := v.Args[1].Reg()
   570  		p := s.Prog(arm64.AUMULH)
   571  		p.From.Type = obj.TYPE_REG
   572  		p.From.Reg = r1
   573  		p.Reg = r0
   574  		p.To.Type = obj.TYPE_REG
   575  		p.To.Reg = v.Reg0()
   576  		p1 := s.Prog(arm64.AMUL)
   577  		p1.From.Type = obj.TYPE_REG
   578  		p1.From.Reg = r1
   579  		p1.Reg = r0
   580  		p1.To.Type = obj.TYPE_REG
   581  		p1.To.Reg = v.Reg1()
   582  	case ssa.OpARM64LoweredAtomicExchange64,
   583  		ssa.OpARM64LoweredAtomicExchange32:
   584  		// LDAXR	(Rarg0), Rout
   585  		// STLXR	Rarg1, (Rarg0), Rtmp
   586  		// CBNZ		Rtmp, -2(PC)
   587  		ld := arm64.ALDAXR
   588  		st := arm64.ASTLXR
   589  		if v.Op == ssa.OpARM64LoweredAtomicExchange32 {
   590  			ld = arm64.ALDAXRW
   591  			st = arm64.ASTLXRW
   592  		}
   593  		r0 := v.Args[0].Reg()
   594  		r1 := v.Args[1].Reg()
   595  		out := v.Reg0()
   596  		p := s.Prog(ld)
   597  		p.From.Type = obj.TYPE_MEM
   598  		p.From.Reg = r0
   599  		p.To.Type = obj.TYPE_REG
   600  		p.To.Reg = out
   601  		p1 := s.Prog(st)
   602  		p1.From.Type = obj.TYPE_REG
   603  		p1.From.Reg = r1
   604  		p1.To.Type = obj.TYPE_MEM
   605  		p1.To.Reg = r0
   606  		p1.RegTo2 = arm64.REGTMP
   607  		p2 := s.Prog(arm64.ACBNZ)
   608  		p2.From.Type = obj.TYPE_REG
   609  		p2.From.Reg = arm64.REGTMP
   610  		p2.To.Type = obj.TYPE_BRANCH
   611  		p2.To.SetTarget(p)
   612  	case ssa.OpARM64LoweredAtomicExchange64Variant,
   613  		ssa.OpARM64LoweredAtomicExchange32Variant:
   614  		swap := arm64.ASWPALD
   615  		if v.Op == ssa.OpARM64LoweredAtomicExchange32Variant {
   616  			swap = arm64.ASWPALW
   617  		}
   618  		r0 := v.Args[0].Reg()
   619  		r1 := v.Args[1].Reg()
   620  		out := v.Reg0()
   621  
   622  		// SWPALD	Rarg1, (Rarg0), Rout
   623  		p := s.Prog(swap)
   624  		p.From.Type = obj.TYPE_REG
   625  		p.From.Reg = r1
   626  		p.To.Type = obj.TYPE_MEM
   627  		p.To.Reg = r0
   628  		p.RegTo2 = out
   629  
   630  	case ssa.OpARM64LoweredAtomicAdd64,
   631  		ssa.OpARM64LoweredAtomicAdd32:
   632  		// LDAXR	(Rarg0), Rout
   633  		// ADD		Rarg1, Rout
   634  		// STLXR	Rout, (Rarg0), Rtmp
   635  		// CBNZ		Rtmp, -3(PC)
   636  		ld := arm64.ALDAXR
   637  		st := arm64.ASTLXR
   638  		if v.Op == ssa.OpARM64LoweredAtomicAdd32 {
   639  			ld = arm64.ALDAXRW
   640  			st = arm64.ASTLXRW
   641  		}
   642  		r0 := v.Args[0].Reg()
   643  		r1 := v.Args[1].Reg()
   644  		out := v.Reg0()
   645  		p := s.Prog(ld)
   646  		p.From.Type = obj.TYPE_MEM
   647  		p.From.Reg = r0
   648  		p.To.Type = obj.TYPE_REG
   649  		p.To.Reg = out
   650  		p1 := s.Prog(arm64.AADD)
   651  		p1.From.Type = obj.TYPE_REG
   652  		p1.From.Reg = r1
   653  		p1.To.Type = obj.TYPE_REG
   654  		p1.To.Reg = out
   655  		p2 := s.Prog(st)
   656  		p2.From.Type = obj.TYPE_REG
   657  		p2.From.Reg = out
   658  		p2.To.Type = obj.TYPE_MEM
   659  		p2.To.Reg = r0
   660  		p2.RegTo2 = arm64.REGTMP
   661  		p3 := s.Prog(arm64.ACBNZ)
   662  		p3.From.Type = obj.TYPE_REG
   663  		p3.From.Reg = arm64.REGTMP
   664  		p3.To.Type = obj.TYPE_BRANCH
   665  		p3.To.SetTarget(p)
   666  	case ssa.OpARM64LoweredAtomicAdd64Variant,
   667  		ssa.OpARM64LoweredAtomicAdd32Variant:
   668  		// LDADDAL	Rarg1, (Rarg0), Rout
   669  		// ADD		Rarg1, Rout
   670  		op := arm64.ALDADDALD
   671  		if v.Op == ssa.OpARM64LoweredAtomicAdd32Variant {
   672  			op = arm64.ALDADDALW
   673  		}
   674  		r0 := v.Args[0].Reg()
   675  		r1 := v.Args[1].Reg()
   676  		out := v.Reg0()
   677  		p := s.Prog(op)
   678  		p.From.Type = obj.TYPE_REG
   679  		p.From.Reg = r1
   680  		p.To.Type = obj.TYPE_MEM
   681  		p.To.Reg = r0
   682  		p.RegTo2 = out
   683  		p1 := s.Prog(arm64.AADD)
   684  		p1.From.Type = obj.TYPE_REG
   685  		p1.From.Reg = r1
   686  		p1.To.Type = obj.TYPE_REG
   687  		p1.To.Reg = out
   688  	case ssa.OpARM64LoweredAtomicCas64,
   689  		ssa.OpARM64LoweredAtomicCas32:
   690  		// LDAXR	(Rarg0), Rtmp
   691  		// CMP		Rarg1, Rtmp
   692  		// BNE		3(PC)
   693  		// STLXR	Rarg2, (Rarg0), Rtmp
   694  		// CBNZ		Rtmp, -4(PC)
   695  		// CSET		EQ, Rout
   696  		ld := arm64.ALDAXR
   697  		st := arm64.ASTLXR
   698  		cmp := arm64.ACMP
   699  		if v.Op == ssa.OpARM64LoweredAtomicCas32 {
   700  			ld = arm64.ALDAXRW
   701  			st = arm64.ASTLXRW
   702  			cmp = arm64.ACMPW
   703  		}
   704  		r0 := v.Args[0].Reg()
   705  		r1 := v.Args[1].Reg()
   706  		r2 := v.Args[2].Reg()
   707  		out := v.Reg0()
   708  		p := s.Prog(ld)
   709  		p.From.Type = obj.TYPE_MEM
   710  		p.From.Reg = r0
   711  		p.To.Type = obj.TYPE_REG
   712  		p.To.Reg = arm64.REGTMP
   713  		p1 := s.Prog(cmp)
   714  		p1.From.Type = obj.TYPE_REG
   715  		p1.From.Reg = r1
   716  		p1.Reg = arm64.REGTMP
   717  		p2 := s.Prog(arm64.ABNE)
   718  		p2.To.Type = obj.TYPE_BRANCH
   719  		p3 := s.Prog(st)
   720  		p3.From.Type = obj.TYPE_REG
   721  		p3.From.Reg = r2
   722  		p3.To.Type = obj.TYPE_MEM
   723  		p3.To.Reg = r0
   724  		p3.RegTo2 = arm64.REGTMP
   725  		p4 := s.Prog(arm64.ACBNZ)
   726  		p4.From.Type = obj.TYPE_REG
   727  		p4.From.Reg = arm64.REGTMP
   728  		p4.To.Type = obj.TYPE_BRANCH
   729  		p4.To.SetTarget(p)
   730  		p5 := s.Prog(arm64.ACSET)
   731  		p5.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
   732  		p5.From.Reg = arm64.COND_EQ
   733  		p5.To.Type = obj.TYPE_REG
   734  		p5.To.Reg = out
   735  		p2.To.SetTarget(p5)
   736  	case ssa.OpARM64LoweredAtomicCas64Variant,
   737  		ssa.OpARM64LoweredAtomicCas32Variant:
   738  		// Rarg0: ptr
   739  		// Rarg1: old
   740  		// Rarg2: new
   741  		// MOV  	Rarg1, Rtmp
   742  		// CASAL	Rtmp, (Rarg0), Rarg2
   743  		// CMP  	Rarg1, Rtmp
   744  		// CSET 	EQ, Rout
   745  		cas := arm64.ACASALD
   746  		cmp := arm64.ACMP
   747  		mov := arm64.AMOVD
   748  		if v.Op == ssa.OpARM64LoweredAtomicCas32Variant {
   749  			cas = arm64.ACASALW
   750  			cmp = arm64.ACMPW
   751  			mov = arm64.AMOVW
   752  		}
   753  		r0 := v.Args[0].Reg()
   754  		r1 := v.Args[1].Reg()
   755  		r2 := v.Args[2].Reg()
   756  		out := v.Reg0()
   757  
   758  		// MOV  	Rarg1, Rtmp
   759  		p := s.Prog(mov)
   760  		p.From.Type = obj.TYPE_REG
   761  		p.From.Reg = r1
   762  		p.To.Type = obj.TYPE_REG
   763  		p.To.Reg = arm64.REGTMP
   764  
   765  		// CASAL	Rtmp, (Rarg0), Rarg2
   766  		p1 := s.Prog(cas)
   767  		p1.From.Type = obj.TYPE_REG
   768  		p1.From.Reg = arm64.REGTMP
   769  		p1.To.Type = obj.TYPE_MEM
   770  		p1.To.Reg = r0
   771  		p1.RegTo2 = r2
   772  
   773  		// CMP  	Rarg1, Rtmp
   774  		p2 := s.Prog(cmp)
   775  		p2.From.Type = obj.TYPE_REG
   776  		p2.From.Reg = r1
   777  		p2.Reg = arm64.REGTMP
   778  
   779  		// CSET 	EQ, Rout
   780  		p3 := s.Prog(arm64.ACSET)
   781  		p3.From.Type = obj.TYPE_REG
   782  		p3.From.Reg = arm64.COND_EQ
   783  		p3.To.Type = obj.TYPE_REG
   784  		p3.To.Reg = out
   785  
   786  	case ssa.OpARM64LoweredAtomicAnd8,
   787  		ssa.OpARM64LoweredAtomicAnd32,
   788  		ssa.OpARM64LoweredAtomicOr8,
   789  		ssa.OpARM64LoweredAtomicOr32:
   790  		// LDAXRB/LDAXRW (Rarg0), Rout
   791  		// AND/OR	Rarg1, Rout
   792  		// STLXRB/STLXRB Rout, (Rarg0), Rtmp
   793  		// CBNZ		Rtmp, -3(PC)
   794  		ld := arm64.ALDAXRB
   795  		st := arm64.ASTLXRB
   796  		if v.Op == ssa.OpARM64LoweredAtomicAnd32 || v.Op == ssa.OpARM64LoweredAtomicOr32 {
   797  			ld = arm64.ALDAXRW
   798  			st = arm64.ASTLXRW
   799  		}
   800  		r0 := v.Args[0].Reg()
   801  		r1 := v.Args[1].Reg()
   802  		out := v.Reg0()
   803  		p := s.Prog(ld)
   804  		p.From.Type = obj.TYPE_MEM
   805  		p.From.Reg = r0
   806  		p.To.Type = obj.TYPE_REG
   807  		p.To.Reg = out
   808  		p1 := s.Prog(v.Op.Asm())
   809  		p1.From.Type = obj.TYPE_REG
   810  		p1.From.Reg = r1
   811  		p1.To.Type = obj.TYPE_REG
   812  		p1.To.Reg = out
   813  		p2 := s.Prog(st)
   814  		p2.From.Type = obj.TYPE_REG
   815  		p2.From.Reg = out
   816  		p2.To.Type = obj.TYPE_MEM
   817  		p2.To.Reg = r0
   818  		p2.RegTo2 = arm64.REGTMP
   819  		p3 := s.Prog(arm64.ACBNZ)
   820  		p3.From.Type = obj.TYPE_REG
   821  		p3.From.Reg = arm64.REGTMP
   822  		p3.To.Type = obj.TYPE_BRANCH
   823  		p3.To.SetTarget(p)
   824  	case ssa.OpARM64LoweredAtomicAnd8Variant,
   825  		ssa.OpARM64LoweredAtomicAnd32Variant:
   826  		atomic_clear := arm64.ALDCLRALW
   827  		if v.Op == ssa.OpARM64LoweredAtomicAnd8Variant {
   828  			atomic_clear = arm64.ALDCLRALB
   829  		}
   830  		r0 := v.Args[0].Reg()
   831  		r1 := v.Args[1].Reg()
   832  		out := v.Reg0()
   833  
   834  		// MNV       Rarg1 Rtemp
   835  		p := s.Prog(arm64.AMVN)
   836  		p.From.Type = obj.TYPE_REG
   837  		p.From.Reg = r1
   838  		p.To.Type = obj.TYPE_REG
   839  		p.To.Reg = arm64.REGTMP
   840  
   841  		// LDCLRALW  Rtemp, (Rarg0), Rout
   842  		p1 := s.Prog(atomic_clear)
   843  		p1.From.Type = obj.TYPE_REG
   844  		p1.From.Reg = arm64.REGTMP
   845  		p1.To.Type = obj.TYPE_MEM
   846  		p1.To.Reg = r0
   847  		p1.RegTo2 = out
   848  
   849  		// AND       Rarg1, Rout
   850  		p2 := s.Prog(arm64.AAND)
   851  		p2.From.Type = obj.TYPE_REG
   852  		p2.From.Reg = r1
   853  		p2.To.Type = obj.TYPE_REG
   854  		p2.To.Reg = out
   855  
   856  	case ssa.OpARM64LoweredAtomicOr8Variant,
   857  		ssa.OpARM64LoweredAtomicOr32Variant:
   858  		atomic_or := arm64.ALDORALW
   859  		if v.Op == ssa.OpARM64LoweredAtomicOr8Variant {
   860  			atomic_or = arm64.ALDORALB
   861  		}
   862  		r0 := v.Args[0].Reg()
   863  		r1 := v.Args[1].Reg()
   864  		out := v.Reg0()
   865  
   866  		// LDORALW  Rarg1, (Rarg0), Rout
   867  		p := s.Prog(atomic_or)
   868  		p.From.Type = obj.TYPE_REG
   869  		p.From.Reg = r1
   870  		p.To.Type = obj.TYPE_MEM
   871  		p.To.Reg = r0
   872  		p.RegTo2 = out
   873  
   874  		// ORR       Rarg1, Rout
   875  		p2 := s.Prog(arm64.AORR)
   876  		p2.From.Type = obj.TYPE_REG
   877  		p2.From.Reg = r1
   878  		p2.To.Type = obj.TYPE_REG
   879  		p2.To.Reg = out
   880  
   881  	case ssa.OpARM64MOVBreg,
   882  		ssa.OpARM64MOVBUreg,
   883  		ssa.OpARM64MOVHreg,
   884  		ssa.OpARM64MOVHUreg,
   885  		ssa.OpARM64MOVWreg,
   886  		ssa.OpARM64MOVWUreg:
   887  		a := v.Args[0]
   888  		for a.Op == ssa.OpCopy || a.Op == ssa.OpARM64MOVDreg {
   889  			a = a.Args[0]
   890  		}
   891  		if a.Op == ssa.OpLoadReg {
   892  			t := a.Type
   893  			switch {
   894  			case v.Op == ssa.OpARM64MOVBreg && t.Size() == 1 && t.IsSigned(),
   895  				v.Op == ssa.OpARM64MOVBUreg && t.Size() == 1 && !t.IsSigned(),
   896  				v.Op == ssa.OpARM64MOVHreg && t.Size() == 2 && t.IsSigned(),
   897  				v.Op == ssa.OpARM64MOVHUreg && t.Size() == 2 && !t.IsSigned(),
   898  				v.Op == ssa.OpARM64MOVWreg && t.Size() == 4 && t.IsSigned(),
   899  				v.Op == ssa.OpARM64MOVWUreg && t.Size() == 4 && !t.IsSigned():
   900  				// arg is a proper-typed load, already zero/sign-extended, don't extend again
   901  				if v.Reg() == v.Args[0].Reg() {
   902  					return
   903  				}
   904  				p := s.Prog(arm64.AMOVD)
   905  				p.From.Type = obj.TYPE_REG
   906  				p.From.Reg = v.Args[0].Reg()
   907  				p.To.Type = obj.TYPE_REG
   908  				p.To.Reg = v.Reg()
   909  				return
   910  			default:
   911  			}
   912  		}
   913  		fallthrough
   914  	case ssa.OpARM64MVN,
   915  		ssa.OpARM64NEG,
   916  		ssa.OpARM64FABSD,
   917  		ssa.OpARM64FMOVDfpgp,
   918  		ssa.OpARM64FMOVDgpfp,
   919  		ssa.OpARM64FMOVSfpgp,
   920  		ssa.OpARM64FMOVSgpfp,
   921  		ssa.OpARM64FNEGS,
   922  		ssa.OpARM64FNEGD,
   923  		ssa.OpARM64FSQRTS,
   924  		ssa.OpARM64FSQRTD,
   925  		ssa.OpARM64FCVTZSSW,
   926  		ssa.OpARM64FCVTZSDW,
   927  		ssa.OpARM64FCVTZUSW,
   928  		ssa.OpARM64FCVTZUDW,
   929  		ssa.OpARM64FCVTZSS,
   930  		ssa.OpARM64FCVTZSD,
   931  		ssa.OpARM64FCVTZUS,
   932  		ssa.OpARM64FCVTZUD,
   933  		ssa.OpARM64SCVTFWS,
   934  		ssa.OpARM64SCVTFWD,
   935  		ssa.OpARM64SCVTFS,
   936  		ssa.OpARM64SCVTFD,
   937  		ssa.OpARM64UCVTFWS,
   938  		ssa.OpARM64UCVTFWD,
   939  		ssa.OpARM64UCVTFS,
   940  		ssa.OpARM64UCVTFD,
   941  		ssa.OpARM64FCVTSD,
   942  		ssa.OpARM64FCVTDS,
   943  		ssa.OpARM64REV,
   944  		ssa.OpARM64REVW,
   945  		ssa.OpARM64REV16,
   946  		ssa.OpARM64REV16W,
   947  		ssa.OpARM64RBIT,
   948  		ssa.OpARM64RBITW,
   949  		ssa.OpARM64CLZ,
   950  		ssa.OpARM64CLZW,
   951  		ssa.OpARM64FRINTAD,
   952  		ssa.OpARM64FRINTMD,
   953  		ssa.OpARM64FRINTND,
   954  		ssa.OpARM64FRINTPD,
   955  		ssa.OpARM64FRINTZD:
   956  		p := s.Prog(v.Op.Asm())
   957  		p.From.Type = obj.TYPE_REG
   958  		p.From.Reg = v.Args[0].Reg()
   959  		p.To.Type = obj.TYPE_REG
   960  		p.To.Reg = v.Reg()
   961  	case ssa.OpARM64LoweredRound32F, ssa.OpARM64LoweredRound64F:
   962  		// input is already rounded
   963  	case ssa.OpARM64VCNT:
   964  		p := s.Prog(v.Op.Asm())
   965  		p.From.Type = obj.TYPE_REG
   966  		p.From.Reg = (v.Args[0].Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
   967  		p.To.Type = obj.TYPE_REG
   968  		p.To.Reg = (v.Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
   969  	case ssa.OpARM64VUADDLV:
   970  		p := s.Prog(v.Op.Asm())
   971  		p.From.Type = obj.TYPE_REG
   972  		p.From.Reg = (v.Args[0].Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
   973  		p.To.Type = obj.TYPE_REG
   974  		p.To.Reg = v.Reg() - arm64.REG_F0 + arm64.REG_V0
   975  	case ssa.OpARM64CSEL, ssa.OpARM64CSEL0:
   976  		r1 := int16(arm64.REGZERO)
   977  		if v.Op != ssa.OpARM64CSEL0 {
   978  			r1 = v.Args[1].Reg()
   979  		}
   980  		p := s.Prog(v.Op.Asm())
   981  		p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
   982  		p.From.Reg = condBits[ssa.Op(v.AuxInt)]
   983  		p.Reg = v.Args[0].Reg()
   984  		p.SetFrom3Reg(r1)
   985  		p.To.Type = obj.TYPE_REG
   986  		p.To.Reg = v.Reg()
   987  	case ssa.OpARM64CSINC, ssa.OpARM64CSINV, ssa.OpARM64CSNEG:
   988  		p := s.Prog(v.Op.Asm())
   989  		p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
   990  		p.From.Reg = condBits[ssa.Op(v.AuxInt)]
   991  		p.Reg = v.Args[0].Reg()
   992  		p.SetFrom3Reg(v.Args[1].Reg())
   993  		p.To.Type = obj.TYPE_REG
   994  		p.To.Reg = v.Reg()
   995  	case ssa.OpARM64CSETM:
   996  		p := s.Prog(arm64.ACSETM)
   997  		p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
   998  		p.From.Reg = condBits[ssa.Op(v.AuxInt)]
   999  		p.To.Type = obj.TYPE_REG
  1000  		p.To.Reg = v.Reg()
  1001  	case ssa.OpARM64DUFFZERO:
  1002  		// runtime.duffzero expects start address in R20
  1003  		p := s.Prog(obj.ADUFFZERO)
  1004  		p.To.Type = obj.TYPE_MEM
  1005  		p.To.Name = obj.NAME_EXTERN
  1006  		p.To.Sym = ir.Syms.Duffzero
  1007  		p.To.Offset = v.AuxInt
  1008  	case ssa.OpARM64LoweredZero:
  1009  		// STP.P	(ZR,ZR), 16(R16)
  1010  		// CMP	Rarg1, R16
  1011  		// BLE	-2(PC)
  1012  		// arg1 is the address of the last 16-byte unit to zero
  1013  		p := s.Prog(arm64.ASTP)
  1014  		p.Scond = arm64.C_XPOST
  1015  		p.From.Type = obj.TYPE_REGREG
  1016  		p.From.Reg = arm64.REGZERO
  1017  		p.From.Offset = int64(arm64.REGZERO)
  1018  		p.To.Type = obj.TYPE_MEM
  1019  		p.To.Reg = arm64.REG_R16
  1020  		p.To.Offset = 16
  1021  		p2 := s.Prog(arm64.ACMP)
  1022  		p2.From.Type = obj.TYPE_REG
  1023  		p2.From.Reg = v.Args[1].Reg()
  1024  		p2.Reg = arm64.REG_R16
  1025  		p3 := s.Prog(arm64.ABLE)
  1026  		p3.To.Type = obj.TYPE_BRANCH
  1027  		p3.To.SetTarget(p)
  1028  	case ssa.OpARM64DUFFCOPY:
  1029  		p := s.Prog(obj.ADUFFCOPY)
  1030  		p.To.Type = obj.TYPE_MEM
  1031  		p.To.Name = obj.NAME_EXTERN
  1032  		p.To.Sym = ir.Syms.Duffcopy
  1033  		p.To.Offset = v.AuxInt
  1034  	case ssa.OpARM64LoweredMove:
  1035  		// MOVD.P	8(R16), Rtmp
  1036  		// MOVD.P	Rtmp, 8(R17)
  1037  		// CMP	Rarg2, R16
  1038  		// BLE	-3(PC)
  1039  		// arg2 is the address of the last element of src
  1040  		p := s.Prog(arm64.AMOVD)
  1041  		p.Scond = arm64.C_XPOST
  1042  		p.From.Type = obj.TYPE_MEM
  1043  		p.From.Reg = arm64.REG_R16
  1044  		p.From.Offset = 8
  1045  		p.To.Type = obj.TYPE_REG
  1046  		p.To.Reg = arm64.REGTMP
  1047  		p2 := s.Prog(arm64.AMOVD)
  1048  		p2.Scond = arm64.C_XPOST
  1049  		p2.From.Type = obj.TYPE_REG
  1050  		p2.From.Reg = arm64.REGTMP
  1051  		p2.To.Type = obj.TYPE_MEM
  1052  		p2.To.Reg = arm64.REG_R17
  1053  		p2.To.Offset = 8
  1054  		p3 := s.Prog(arm64.ACMP)
  1055  		p3.From.Type = obj.TYPE_REG
  1056  		p3.From.Reg = v.Args[2].Reg()
  1057  		p3.Reg = arm64.REG_R16
  1058  		p4 := s.Prog(arm64.ABLE)
  1059  		p4.To.Type = obj.TYPE_BRANCH
  1060  		p4.To.SetTarget(p)
  1061  	case ssa.OpARM64CALLstatic, ssa.OpARM64CALLclosure, ssa.OpARM64CALLinter:
  1062  		s.Call(v)
  1063  	case ssa.OpARM64CALLtail:
  1064  		s.TailCall(v)
  1065  	case ssa.OpARM64LoweredWB:
  1066  		p := s.Prog(obj.ACALL)
  1067  		p.To.Type = obj.TYPE_MEM
  1068  		p.To.Name = obj.NAME_EXTERN
  1069  		p.To.Sym = v.Aux.(*obj.LSym)
  1070  	case ssa.OpARM64LoweredPanicBoundsA, ssa.OpARM64LoweredPanicBoundsB, ssa.OpARM64LoweredPanicBoundsC:
  1071  		p := s.Prog(obj.ACALL)
  1072  		p.To.Type = obj.TYPE_MEM
  1073  		p.To.Name = obj.NAME_EXTERN
  1074  		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
  1075  		s.UseArgs(16) // space used in callee args area by assembly stubs
  1076  	case ssa.OpARM64LoweredNilCheck:
  1077  		// Issue a load which will fault if arg is nil.
  1078  		p := s.Prog(arm64.AMOVB)
  1079  		p.From.Type = obj.TYPE_MEM
  1080  		p.From.Reg = v.Args[0].Reg()
  1081  		ssagen.AddAux(&p.From, v)
  1082  		p.To.Type = obj.TYPE_REG
  1083  		p.To.Reg = arm64.REGTMP
  1084  		if logopt.Enabled() {
  1085  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1086  		}
  1087  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Line==1 in generated wrappers
  1088  			base.WarnfAt(v.Pos, "generated nil check")
  1089  		}
  1090  	case ssa.OpARM64Equal,
  1091  		ssa.OpARM64NotEqual,
  1092  		ssa.OpARM64LessThan,
  1093  		ssa.OpARM64LessEqual,
  1094  		ssa.OpARM64GreaterThan,
  1095  		ssa.OpARM64GreaterEqual,
  1096  		ssa.OpARM64LessThanU,
  1097  		ssa.OpARM64LessEqualU,
  1098  		ssa.OpARM64GreaterThanU,
  1099  		ssa.OpARM64GreaterEqualU,
  1100  		ssa.OpARM64LessThanF,
  1101  		ssa.OpARM64LessEqualF,
  1102  		ssa.OpARM64GreaterThanF,
  1103  		ssa.OpARM64GreaterEqualF,
  1104  		ssa.OpARM64NotLessThanF,
  1105  		ssa.OpARM64NotLessEqualF,
  1106  		ssa.OpARM64NotGreaterThanF,
  1107  		ssa.OpARM64NotGreaterEqualF:
  1108  		// generate boolean values using CSET
  1109  		p := s.Prog(arm64.ACSET)
  1110  		p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
  1111  		p.From.Reg = condBits[v.Op]
  1112  		p.To.Type = obj.TYPE_REG
  1113  		p.To.Reg = v.Reg()
  1114  	case ssa.OpARM64PRFM:
  1115  		p := s.Prog(v.Op.Asm())
  1116  		p.From.Type = obj.TYPE_MEM
  1117  		p.From.Reg = v.Args[0].Reg()
  1118  		p.To.Type = obj.TYPE_CONST
  1119  		p.To.Offset = v.AuxInt
  1120  	case ssa.OpARM64LoweredGetClosurePtr:
  1121  		// Closure pointer is R26 (arm64.REGCTXT).
  1122  		ssagen.CheckLoweredGetClosurePtr(v)
  1123  	case ssa.OpARM64LoweredGetCallerSP:
  1124  		// caller's SP is FixedFrameSize below the address of the first arg
  1125  		p := s.Prog(arm64.AMOVD)
  1126  		p.From.Type = obj.TYPE_ADDR
  1127  		p.From.Offset = -base.Ctxt.FixedFrameSize()
  1128  		p.From.Name = obj.NAME_PARAM
  1129  		p.To.Type = obj.TYPE_REG
  1130  		p.To.Reg = v.Reg()
  1131  	case ssa.OpARM64LoweredGetCallerPC:
  1132  		p := s.Prog(obj.AGETCALLERPC)
  1133  		p.To.Type = obj.TYPE_REG
  1134  		p.To.Reg = v.Reg()
  1135  	case ssa.OpARM64DMB:
  1136  		p := s.Prog(v.Op.Asm())
  1137  		p.From.Type = obj.TYPE_CONST
  1138  		p.From.Offset = v.AuxInt
  1139  	case ssa.OpARM64FlagConstant:
  1140  		v.Fatalf("FlagConstant op should never make it to codegen %v", v.LongString())
  1141  	case ssa.OpARM64InvertFlags:
  1142  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1143  	case ssa.OpClobber:
  1144  		// MOVW	$0xdeaddead, REGTMP
  1145  		// MOVW	REGTMP, (slot)
  1146  		// MOVW	REGTMP, 4(slot)
  1147  		p := s.Prog(arm64.AMOVW)
  1148  		p.From.Type = obj.TYPE_CONST
  1149  		p.From.Offset = 0xdeaddead
  1150  		p.To.Type = obj.TYPE_REG
  1151  		p.To.Reg = arm64.REGTMP
  1152  		p = s.Prog(arm64.AMOVW)
  1153  		p.From.Type = obj.TYPE_REG
  1154  		p.From.Reg = arm64.REGTMP
  1155  		p.To.Type = obj.TYPE_MEM
  1156  		p.To.Reg = arm64.REGSP
  1157  		ssagen.AddAux(&p.To, v)
  1158  		p = s.Prog(arm64.AMOVW)
  1159  		p.From.Type = obj.TYPE_REG
  1160  		p.From.Reg = arm64.REGTMP
  1161  		p.To.Type = obj.TYPE_MEM
  1162  		p.To.Reg = arm64.REGSP
  1163  		ssagen.AddAux2(&p.To, v, v.AuxInt+4)
  1164  	case ssa.OpClobberReg:
  1165  		x := uint64(0xdeaddeaddeaddead)
  1166  		p := s.Prog(arm64.AMOVD)
  1167  		p.From.Type = obj.TYPE_CONST
  1168  		p.From.Offset = int64(x)
  1169  		p.To.Type = obj.TYPE_REG
  1170  		p.To.Reg = v.Reg()
  1171  	default:
  1172  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1173  	}
  1174  }
  1175  
  1176  var condBits = map[ssa.Op]int16{
  1177  	ssa.OpARM64Equal:         arm64.COND_EQ,
  1178  	ssa.OpARM64NotEqual:      arm64.COND_NE,
  1179  	ssa.OpARM64LessThan:      arm64.COND_LT,
  1180  	ssa.OpARM64LessThanU:     arm64.COND_LO,
  1181  	ssa.OpARM64LessEqual:     arm64.COND_LE,
  1182  	ssa.OpARM64LessEqualU:    arm64.COND_LS,
  1183  	ssa.OpARM64GreaterThan:   arm64.COND_GT,
  1184  	ssa.OpARM64GreaterThanU:  arm64.COND_HI,
  1185  	ssa.OpARM64GreaterEqual:  arm64.COND_GE,
  1186  	ssa.OpARM64GreaterEqualU: arm64.COND_HS,
  1187  	ssa.OpARM64LessThanF:     arm64.COND_MI, // Less than
  1188  	ssa.OpARM64LessEqualF:    arm64.COND_LS, // Less than or equal to
  1189  	ssa.OpARM64GreaterThanF:  arm64.COND_GT, // Greater than
  1190  	ssa.OpARM64GreaterEqualF: arm64.COND_GE, // Greater than or equal to
  1191  
  1192  	// The following condition codes have unordered to handle comparisons related to NaN.
  1193  	ssa.OpARM64NotLessThanF:     arm64.COND_PL, // Greater than, equal to, or unordered
  1194  	ssa.OpARM64NotLessEqualF:    arm64.COND_HI, // Greater than or unordered
  1195  	ssa.OpARM64NotGreaterThanF:  arm64.COND_LE, // Less than, equal to or unordered
  1196  	ssa.OpARM64NotGreaterEqualF: arm64.COND_LT, // Less than or unordered
  1197  }
  1198  
  1199  var blockJump = map[ssa.BlockKind]struct {
  1200  	asm, invasm obj.As
  1201  }{
  1202  	ssa.BlockARM64EQ:     {arm64.ABEQ, arm64.ABNE},
  1203  	ssa.BlockARM64NE:     {arm64.ABNE, arm64.ABEQ},
  1204  	ssa.BlockARM64LT:     {arm64.ABLT, arm64.ABGE},
  1205  	ssa.BlockARM64GE:     {arm64.ABGE, arm64.ABLT},
  1206  	ssa.BlockARM64LE:     {arm64.ABLE, arm64.ABGT},
  1207  	ssa.BlockARM64GT:     {arm64.ABGT, arm64.ABLE},
  1208  	ssa.BlockARM64ULT:    {arm64.ABLO, arm64.ABHS},
  1209  	ssa.BlockARM64UGE:    {arm64.ABHS, arm64.ABLO},
  1210  	ssa.BlockARM64UGT:    {arm64.ABHI, arm64.ABLS},
  1211  	ssa.BlockARM64ULE:    {arm64.ABLS, arm64.ABHI},
  1212  	ssa.BlockARM64Z:      {arm64.ACBZ, arm64.ACBNZ},
  1213  	ssa.BlockARM64NZ:     {arm64.ACBNZ, arm64.ACBZ},
  1214  	ssa.BlockARM64ZW:     {arm64.ACBZW, arm64.ACBNZW},
  1215  	ssa.BlockARM64NZW:    {arm64.ACBNZW, arm64.ACBZW},
  1216  	ssa.BlockARM64TBZ:    {arm64.ATBZ, arm64.ATBNZ},
  1217  	ssa.BlockARM64TBNZ:   {arm64.ATBNZ, arm64.ATBZ},
  1218  	ssa.BlockARM64FLT:    {arm64.ABMI, arm64.ABPL},
  1219  	ssa.BlockARM64FGE:    {arm64.ABGE, arm64.ABLT},
  1220  	ssa.BlockARM64FLE:    {arm64.ABLS, arm64.ABHI},
  1221  	ssa.BlockARM64FGT:    {arm64.ABGT, arm64.ABLE},
  1222  	ssa.BlockARM64LTnoov: {arm64.ABMI, arm64.ABPL},
  1223  	ssa.BlockARM64GEnoov: {arm64.ABPL, arm64.ABMI},
  1224  }
  1225  
  1226  // To model a 'LEnoov' ('<=' without overflow checking) branching
  1227  var leJumps = [2][2]ssagen.IndexJump{
  1228  	{{Jump: arm64.ABEQ, Index: 0}, {Jump: arm64.ABPL, Index: 1}}, // next == b.Succs[0]
  1229  	{{Jump: arm64.ABMI, Index: 0}, {Jump: arm64.ABEQ, Index: 0}}, // next == b.Succs[1]
  1230  }
  1231  
  1232  // To model a 'GTnoov' ('>' without overflow checking) branching
  1233  var gtJumps = [2][2]ssagen.IndexJump{
  1234  	{{Jump: arm64.ABMI, Index: 1}, {Jump: arm64.ABEQ, Index: 1}}, // next == b.Succs[0]
  1235  	{{Jump: arm64.ABEQ, Index: 1}, {Jump: arm64.ABPL, Index: 0}}, // next == b.Succs[1]
  1236  }
  1237  
  1238  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  1239  	switch b.Kind {
  1240  	case ssa.BlockPlain:
  1241  		if b.Succs[0].Block() != next {
  1242  			p := s.Prog(obj.AJMP)
  1243  			p.To.Type = obj.TYPE_BRANCH
  1244  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  1245  		}
  1246  
  1247  	case ssa.BlockDefer:
  1248  		// defer returns in R0:
  1249  		// 0 if we should continue executing
  1250  		// 1 if we should jump to deferreturn call
  1251  		p := s.Prog(arm64.ACMP)
  1252  		p.From.Type = obj.TYPE_CONST
  1253  		p.From.Offset = 0
  1254  		p.Reg = arm64.REG_R0
  1255  		p = s.Prog(arm64.ABNE)
  1256  		p.To.Type = obj.TYPE_BRANCH
  1257  		s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
  1258  		if b.Succs[0].Block() != next {
  1259  			p := s.Prog(obj.AJMP)
  1260  			p.To.Type = obj.TYPE_BRANCH
  1261  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  1262  		}
  1263  
  1264  	case ssa.BlockExit, ssa.BlockRetJmp:
  1265  
  1266  	case ssa.BlockRet:
  1267  		s.Prog(obj.ARET)
  1268  
  1269  	case ssa.BlockARM64EQ, ssa.BlockARM64NE,
  1270  		ssa.BlockARM64LT, ssa.BlockARM64GE,
  1271  		ssa.BlockARM64LE, ssa.BlockARM64GT,
  1272  		ssa.BlockARM64ULT, ssa.BlockARM64UGT,
  1273  		ssa.BlockARM64ULE, ssa.BlockARM64UGE,
  1274  		ssa.BlockARM64Z, ssa.BlockARM64NZ,
  1275  		ssa.BlockARM64ZW, ssa.BlockARM64NZW,
  1276  		ssa.BlockARM64FLT, ssa.BlockARM64FGE,
  1277  		ssa.BlockARM64FLE, ssa.BlockARM64FGT,
  1278  		ssa.BlockARM64LTnoov, ssa.BlockARM64GEnoov:
  1279  		jmp := blockJump[b.Kind]
  1280  		var p *obj.Prog
  1281  		switch next {
  1282  		case b.Succs[0].Block():
  1283  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1284  		case b.Succs[1].Block():
  1285  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1286  		default:
  1287  			if b.Likely != ssa.BranchUnlikely {
  1288  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1289  				s.Br(obj.AJMP, b.Succs[1].Block())
  1290  			} else {
  1291  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1292  				s.Br(obj.AJMP, b.Succs[0].Block())
  1293  			}
  1294  		}
  1295  		if !b.Controls[0].Type.IsFlags() {
  1296  			p.From.Type = obj.TYPE_REG
  1297  			p.From.Reg = b.Controls[0].Reg()
  1298  		}
  1299  	case ssa.BlockARM64TBZ, ssa.BlockARM64TBNZ:
  1300  		jmp := blockJump[b.Kind]
  1301  		var p *obj.Prog
  1302  		switch next {
  1303  		case b.Succs[0].Block():
  1304  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1305  		case b.Succs[1].Block():
  1306  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1307  		default:
  1308  			if b.Likely != ssa.BranchUnlikely {
  1309  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1310  				s.Br(obj.AJMP, b.Succs[1].Block())
  1311  			} else {
  1312  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1313  				s.Br(obj.AJMP, b.Succs[0].Block())
  1314  			}
  1315  		}
  1316  		p.From.Offset = b.AuxInt
  1317  		p.From.Type = obj.TYPE_CONST
  1318  		p.Reg = b.Controls[0].Reg()
  1319  
  1320  	case ssa.BlockARM64LEnoov:
  1321  		s.CombJump(b, next, &leJumps)
  1322  	case ssa.BlockARM64GTnoov:
  1323  		s.CombJump(b, next, &gtJumps)
  1324  	default:
  1325  		b.Fatalf("branch not implemented: %s", b.LongString())
  1326  	}
  1327  }
  1328  
  1329  func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1330  	p := s.Prog(loadByType(t))
  1331  	p.From.Type = obj.TYPE_MEM
  1332  	p.From.Name = obj.NAME_AUTO
  1333  	p.From.Sym = n.Linksym()
  1334  	p.From.Offset = n.FrameOffset() + off
  1335  	p.To.Type = obj.TYPE_REG
  1336  	p.To.Reg = reg
  1337  	return p
  1338  }
  1339  
  1340  func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1341  	p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
  1342  	p.To.Name = obj.NAME_PARAM
  1343  	p.To.Sym = n.Linksym()
  1344  	p.Pos = p.Pos.WithNotStmt()
  1345  	return p
  1346  }
  1347  

View as plain text