Text file src/cmd/compile/internal/ssa/gen/ARM64.rules

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  (Add(Ptr|64|32|16|8) ...) => (ADD ...)
     6  (Add(32F|64F) ...) => (FADD(S|D) ...)
     7  
     8  (Sub(Ptr|64|32|16|8) ...) => (SUB ...)
     9  (Sub(32F|64F) ...) => (FSUB(S|D) ...)
    10  
    11  (Mul64 ...) => (MUL ...)
    12  (Mul(32|16|8) ...) => (MULW ...)
    13  (Mul(32F|64F) ...) => (FMUL(S|D) ...)
    14  
    15  (Hmul64 ...) => (MULH ...)
    16  (Hmul64u ...) => (UMULH ...)
    17  (Hmul32 x y) => (SRAconst (MULL <typ.Int64> x y) [32])
    18  (Hmul32u x y) => (SRAconst (UMULL <typ.UInt64> x y) [32])
    19  (Mul64uhilo ...) => (LoweredMuluhilo ...)
    20  
    21  (Div64 [false] x y) => (DIV x y)
    22  (Div64u ...) => (UDIV ...)
    23  (Div32 [false] x y) => (DIVW x y)
    24  (Div32u ...) => (UDIVW ...)
    25  (Div16 [false] x y) => (DIVW (SignExt16to32 x) (SignExt16to32 y))
    26  (Div16u x y) => (UDIVW (ZeroExt16to32 x) (ZeroExt16to32 y))
    27  (Div8 x y) => (DIVW (SignExt8to32 x) (SignExt8to32 y))
    28  (Div8u x y) => (UDIVW (ZeroExt8to32 x) (ZeroExt8to32 y))
    29  (Div32F ...) => (FDIVS ...)
    30  (Div64F ...) => (FDIVD ...)
    31  
    32  (Mod64 x y) => (MOD x y)
    33  (Mod64u ...) => (UMOD ...)
    34  (Mod32 x y) => (MODW x y)
    35  (Mod32u ...) => (UMODW ...)
    36  (Mod16 x y) => (MODW (SignExt16to32 x) (SignExt16to32 y))
    37  (Mod16u x y) => (UMODW (ZeroExt16to32 x) (ZeroExt16to32 y))
    38  (Mod8 x y) => (MODW (SignExt8to32 x) (SignExt8to32 y))
    39  (Mod8u x y) => (UMODW (ZeroExt8to32 x) (ZeroExt8to32 y))
    40  
    41  // (x + y) / 2 with x>=y    =>    (x - y) / 2 + y
    42  (Avg64u <t> x y) => (ADD (SRLconst <t> (SUB <t> x y) [1]) y)
    43  
    44  (And(64|32|16|8) ...) => (AND ...)
    45  (Or(64|32|16|8) ...) => (OR ...)
    46  (Xor(64|32|16|8) ...) => (XOR ...)
    47  
    48  // unary ops
    49  (Neg(64|32|16|8) ...) => (NEG ...)
    50  (Neg(32F|64F) ...) => (FNEG(S|D) ...)
    51  (Com(64|32|16|8) ...) => (MVN ...)
    52  
    53  // math package intrinsics
    54  (Abs ...) => (FABSD ...)
    55  (Sqrt ...) => (FSQRTD ...)
    56  (Ceil ...) => (FRINTPD ...)
    57  (Floor ...) => (FRINTMD ...)
    58  (Round ...) => (FRINTAD ...)
    59  (RoundToEven ...) => (FRINTND ...)
    60  (Trunc ...) => (FRINTZD ...)
    61  (FMA x y z) => (FMADDD z x y)
    62  
    63  (Sqrt32 ...) => (FSQRTS ...)
    64  
    65  // lowering rotates
    66  (RotateLeft8 <t> x (MOVDconst [c])) => (Or8 (Lsh8x64 <t> x (MOVDconst [c&7])) (Rsh8Ux64 <t> x (MOVDconst [-c&7])))
    67  (RotateLeft16 <t> x (MOVDconst [c])) => (Or16 (Lsh16x64 <t> x (MOVDconst [c&15])) (Rsh16Ux64 <t> x (MOVDconst [-c&15])))
    68  (RotateLeft32 x y) => (RORW x (NEG <y.Type> y))
    69  (RotateLeft64 x y) => (ROR x (NEG <y.Type> y))
    70  
    71  (Ctz(64|32|16|8)NonZero ...) => (Ctz(64|32|32|32) ...)
    72  
    73  (Ctz64 <t> x) => (CLZ (RBIT <t> x))
    74  (Ctz32 <t> x) => (CLZW (RBITW <t> x))
    75  (Ctz16 <t> x) => (CLZW <t> (RBITW <typ.UInt32> (ORconst <typ.UInt32> [0x10000] x)))
    76  (Ctz8 <t> x) => (CLZW <t> (RBITW <typ.UInt32> (ORconst <typ.UInt32> [0x100] x)))
    77  
    78  (PopCount64 <t> x) => (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> x))))
    79  (PopCount32 <t> x) => (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> (ZeroExt32to64 x)))))
    80  (PopCount16 <t> x) => (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> (ZeroExt16to64 x)))))
    81  
    82  // Load args directly into the register class where it will be used.
    83  (FMOVDgpfp <t> (Arg [off] {sym})) => @b.Func.Entry (Arg <t> [off] {sym})
    84  (FMOVDfpgp <t> (Arg [off] {sym})) => @b.Func.Entry (Arg <t> [off] {sym})
    85  
    86  // Similarly for stores, if we see a store after FPR <=> GPR move, then redirect store to use the other register set.
    87  (MOVDstore [off] {sym} ptr (FMOVDfpgp val) mem) => (FMOVDstore [off] {sym} ptr val mem)
    88  (FMOVDstore [off] {sym} ptr (FMOVDgpfp val) mem) => (MOVDstore [off] {sym} ptr val mem)
    89  (MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem) => (FMOVSstore [off] {sym} ptr val mem)
    90  (FMOVSstore [off] {sym} ptr (FMOVSgpfp val) mem) => (MOVWstore [off] {sym} ptr val mem)
    91  
    92  // float <=> int register moves, with no conversion.
    93  // These come up when compiling math.{Float64bits, Float64frombits, Float32bits, Float32frombits}.
    94  (MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr val _)) => (FMOVDfpgp val)
    95  (FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr val _)) => (FMOVDgpfp val)
    96  (MOVWUload [off] {sym} ptr (FMOVSstore [off] {sym} ptr val _)) => (FMOVSfpgp val)
    97  (FMOVSload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _)) => (FMOVSgpfp val)
    98  
    99  (BitLen64 x) => (SUB (MOVDconst [64]) (CLZ <typ.Int> x))
   100  (BitLen32 x) => (SUB (MOVDconst [32]) (CLZW <typ.Int> x))
   101  
   102  (Bswap64 ...) => (REV ...)
   103  (Bswap32 ...) => (REVW ...)
   104  
   105  (BitRev64 ...) => (RBIT ...)
   106  (BitRev32 ...) => (RBITW ...)
   107  (BitRev16 x) => (SRLconst [48] (RBIT <typ.UInt64> x))
   108  (BitRev8 x) => (SRLconst [56] (RBIT <typ.UInt64> x))
   109  
   110  // In fact, UMOD will be translated into UREM instruction, and UREM is originally translated into
   111  // UDIV and MSUB instructions. But if there is already an identical UDIV instruction just before or
   112  // after UREM (case like quo, rem := z/y, z%y), then the second UDIV instruction becomes redundant.
   113  // The purpose of this rule is to have this extra UDIV instruction removed in CSE pass.
   114  (UMOD <typ.UInt64> x y) => (MSUB <typ.UInt64> x y (UDIV <typ.UInt64> x y))
   115  (UMODW <typ.UInt32> x y) => (MSUBW <typ.UInt32> x y (UDIVW <typ.UInt32> x y))
   116  
   117  // 64-bit addition with carry.
   118  (Select0 (Add64carry x y c)) => (Select0 <typ.UInt64> (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] c))))
   119  (Select1 (Add64carry x y c)) => (ADCzerocarry <typ.UInt64> (Select1 <types.TypeFlags> (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] c)))))
   120  
   121  // 64-bit subtraction with borrowing.
   122  (Select0 (Sub64borrow x y bo)) => (Select0 <typ.UInt64> (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags bo))))
   123  (Select1 (Sub64borrow x y bo)) => (NEG <typ.UInt64> (NGCzerocarry <typ.UInt64> (Select1 <types.TypeFlags> (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags bo))))))
   124  
   125  // boolean ops -- booleans are represented with 0=false, 1=true
   126  (AndB ...) => (AND ...)
   127  (OrB ...) => (OR ...)
   128  (EqB x y) => (XOR (MOVDconst [1]) (XOR <typ.Bool> x y))
   129  (NeqB ...) => (XOR ...)
   130  (Not x) => (XOR (MOVDconst [1]) x)
   131  
   132  // shifts
   133  // hardware instruction uses only the low 6 bits of the shift
   134  // we compare to 64 to ensure Go semantics for large shifts
   135  // Rules about rotates with non-const shift are based on the following rules,
   136  // if the following rules change, please also modify the rules based on them.
   137  (Lsh64x64 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   138  (Lsh64x32 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   139  (Lsh64x16 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   140  (Lsh64x8  <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   141  
   142  (Lsh32x64 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   143  (Lsh32x32 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   144  (Lsh32x16 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   145  (Lsh32x8  <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   146  
   147  (Lsh16x64 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   148  (Lsh16x32 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   149  (Lsh16x16 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   150  (Lsh16x8  <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   151  
   152  (Lsh8x64 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   153  (Lsh8x32 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   154  (Lsh8x16 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   155  (Lsh8x8  <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   156  
   157  (Rsh64Ux64 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   158  (Rsh64Ux32 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   159  (Rsh64Ux16 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   160  (Rsh64Ux8  <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   161  
   162  (Rsh32Ux64 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
   163  (Rsh32Ux32 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   164  (Rsh32Ux16 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   165  (Rsh32Ux8  <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   166  
   167  (Rsh16Ux64 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
   168  (Rsh16Ux32 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   169  (Rsh16Ux16 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   170  (Rsh16Ux8  <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   171  
   172  (Rsh8Ux64 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
   173  (Rsh8Ux32 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   174  (Rsh8Ux16 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   175  (Rsh8Ux8  <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   176  
   177  (Rsh64x64 x y) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
   178  (Rsh64x32 x y) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
   179  (Rsh64x16 x y) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
   180  (Rsh64x8  x y) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
   181  
   182  (Rsh32x64 x y) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
   183  (Rsh32x32 x y) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
   184  (Rsh32x16 x y) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
   185  (Rsh32x8  x y) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
   186  
   187  (Rsh16x64 x y) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
   188  (Rsh16x32 x y) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
   189  (Rsh16x16 x y) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
   190  (Rsh16x8  x y) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
   191  
   192  (Rsh8x64 x y) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
   193  (Rsh8x32 x y) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
   194  (Rsh8x16 x y) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
   195  (Rsh8x8  x y) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
   196  
   197  // constants
   198  (Const(64|32|16|8) [val]) => (MOVDconst [int64(val)])
   199  (Const(32F|64F) [val]) => (FMOV(S|D)const [float64(val)])
   200  (ConstNil) => (MOVDconst [0])
   201  (ConstBool [t]) => (MOVDconst [b2i(t)])
   202  
   203  (Slicemask <t> x) => (SRAconst (NEG <t> x) [63])
   204  
   205  // truncations
   206  // Because we ignore high parts of registers, truncates are just copies.
   207  (Trunc16to8 ...) => (Copy ...)
   208  (Trunc32to8 ...) => (Copy ...)
   209  (Trunc32to16 ...) => (Copy ...)
   210  (Trunc64to8 ...) => (Copy ...)
   211  (Trunc64to16 ...) => (Copy ...)
   212  (Trunc64to32 ...) => (Copy ...)
   213  
   214  // Zero-/Sign-extensions
   215  (ZeroExt8to16 ...) => (MOVBUreg ...)
   216  (ZeroExt8to32 ...) => (MOVBUreg ...)
   217  (ZeroExt16to32 ...) => (MOVHUreg ...)
   218  (ZeroExt8to64 ...) => (MOVBUreg ...)
   219  (ZeroExt16to64 ...) => (MOVHUreg ...)
   220  (ZeroExt32to64 ...) => (MOVWUreg ...)
   221  
   222  (SignExt8to16 ...) => (MOVBreg ...)
   223  (SignExt8to32 ...) => (MOVBreg ...)
   224  (SignExt16to32 ...) => (MOVHreg ...)
   225  (SignExt8to64 ...) => (MOVBreg ...)
   226  (SignExt16to64 ...) => (MOVHreg ...)
   227  (SignExt32to64 ...) => (MOVWreg ...)
   228  
   229  // float <=> int conversion
   230  (Cvt32to32F ...) => (SCVTFWS ...)
   231  (Cvt32to64F ...) => (SCVTFWD ...)
   232  (Cvt64to32F ...) => (SCVTFS ...)
   233  (Cvt64to64F ...) => (SCVTFD ...)
   234  (Cvt32Uto32F ...) => (UCVTFWS ...)
   235  (Cvt32Uto64F ...) => (UCVTFWD ...)
   236  (Cvt64Uto32F ...) => (UCVTFS ...)
   237  (Cvt64Uto64F ...) => (UCVTFD ...)
   238  (Cvt32Fto32 ...) => (FCVTZSSW ...)
   239  (Cvt64Fto32 ...) => (FCVTZSDW ...)
   240  (Cvt32Fto64 ...) => (FCVTZSS ...)
   241  (Cvt64Fto64 ...) => (FCVTZSD ...)
   242  (Cvt32Fto32U ...) => (FCVTZUSW ...)
   243  (Cvt64Fto32U ...) => (FCVTZUDW ...)
   244  (Cvt32Fto64U ...) => (FCVTZUS ...)
   245  (Cvt64Fto64U ...) => (FCVTZUD ...)
   246  (Cvt32Fto64F ...) => (FCVTSD ...)
   247  (Cvt64Fto32F ...) => (FCVTDS ...)
   248  
   249  (CvtBoolToUint8 ...) => (Copy ...)
   250  
   251  (Round32F ...) => (LoweredRound32F ...)
   252  (Round64F ...) => (LoweredRound64F ...)
   253  
   254  // comparisons
   255  (Eq8 x y)  => (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   256  (Eq16 x y) => (Equal (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   257  (Eq32 x y) => (Equal (CMPW x y))
   258  (Eq64 x y) => (Equal (CMP x y))
   259  (EqPtr x y) => (Equal (CMP x y))
   260  (Eq32F x y) => (Equal (FCMPS x y))
   261  (Eq64F x y) => (Equal (FCMPD x y))
   262  
   263  (Neq8 x y)  => (NotEqual (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   264  (Neq16 x y) => (NotEqual (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   265  (Neq32 x y) => (NotEqual (CMPW x y))
   266  (Neq64 x y) => (NotEqual (CMP x y))
   267  (NeqPtr x y) => (NotEqual (CMP x y))
   268  (Neq32F x y) => (NotEqual (FCMPS x y))
   269  (Neq64F x y) => (NotEqual (FCMPD x y))
   270  
   271  (Less8 x y)  => (LessThan (CMPW (SignExt8to32 x) (SignExt8to32 y)))
   272  (Less16 x y) => (LessThan (CMPW (SignExt16to32 x) (SignExt16to32 y)))
   273  (Less32 x y) => (LessThan (CMPW x y))
   274  (Less64 x y) => (LessThan (CMP x y))
   275  
   276  // Set condition flags for floating-point comparisons "x < y"
   277  // and "x <= y". Because if either or both of the operands are
   278  // NaNs, all three of (x < y), (x == y) and (x > y) are false,
   279  // and ARM Manual says FCMP instruction sets PSTATE.<N,Z,C,V>
   280  // of this case to (0, 0, 1, 1).
   281  (Less32F x y) => (LessThanF (FCMPS x y))
   282  (Less64F x y) => (LessThanF (FCMPD x y))
   283  
   284  // For an unsigned integer x, the following rules are useful when combining branch
   285  // 0 <  x  =>  x != 0
   286  // x <= 0  =>  x == 0
   287  // x <  1  =>  x == 0
   288  // 1 <= x  =>  x != 0
   289  (Less(8U|16U|32U|64U) zero:(MOVDconst [0]) x) => (Neq(8|16|32|64) zero x)
   290  (Leq(8U|16U|32U|64U) x zero:(MOVDconst [0]))  => (Eq(8|16|32|64) x zero)
   291  (Less(8U|16U|32U|64U) x (MOVDconst [1])) => (Eq(8|16|32|64) x (MOVDconst [0]))
   292  (Leq(8U|16U|32U|64U) (MOVDconst [1]) x)  => (Neq(8|16|32|64) (MOVDconst [0]) x)
   293  
   294  (Less8U x y)  => (LessThanU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   295  (Less16U x y) => (LessThanU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   296  (Less32U x y) => (LessThanU (CMPW x y))
   297  (Less64U x y) => (LessThanU (CMP x y))
   298  
   299  (Leq8 x y)  => (LessEqual (CMPW (SignExt8to32 x) (SignExt8to32 y)))
   300  (Leq16 x y) => (LessEqual (CMPW (SignExt16to32 x) (SignExt16to32 y)))
   301  (Leq32 x y) => (LessEqual (CMPW x y))
   302  (Leq64 x y) => (LessEqual (CMP x y))
   303  
   304  // Refer to the comments for op Less64F above.
   305  (Leq32F x y) => (LessEqualF (FCMPS x y))
   306  (Leq64F x y) => (LessEqualF (FCMPD x y))
   307  
   308  (Leq8U x y)  => (LessEqualU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   309  (Leq16U x y) => (LessEqualU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   310  (Leq32U x y) => (LessEqualU (CMPW x y))
   311  (Leq64U x y) => (LessEqualU (CMP x y))
   312  
   313  // Optimize comparison between a floating-point value and 0.0 with "FCMP $(0.0), Fn"
   314  (FCMPS x (FMOVSconst [0])) => (FCMPS0 x)
   315  (FCMPS (FMOVSconst [0]) x) => (InvertFlags (FCMPS0 x))
   316  (FCMPD x (FMOVDconst [0])) => (FCMPD0 x)
   317  (FCMPD (FMOVDconst [0]) x) => (InvertFlags (FCMPD0 x))
   318  
   319  // CSEL needs a flag-generating argument. Synthesize a CMPW if necessary.
   320  (CondSelect x y boolval) && flagArg(boolval) != nil => (CSEL [boolval.Op] x y flagArg(boolval))
   321  (CondSelect x y boolval) && flagArg(boolval) == nil => (CSEL [OpARM64NotEqual] x y (CMPWconst [0] boolval))
   322  
   323  (OffPtr [off] ptr:(SP)) && is32Bit(off) => (MOVDaddr [int32(off)] ptr)
   324  (OffPtr [off] ptr) => (ADDconst [off] ptr)
   325  
   326  (Addr {sym} base) => (MOVDaddr {sym} base)
   327  (LocalAddr {sym} base _) => (MOVDaddr {sym} base)
   328  
   329  // loads
   330  (Load <t> ptr mem) && t.IsBoolean() => (MOVBUload ptr mem)
   331  (Load <t> ptr mem) && (is8BitInt(t)  && isSigned(t))  => (MOVBload ptr mem)
   332  (Load <t> ptr mem) && (is8BitInt(t)  && !isSigned(t)) => (MOVBUload ptr mem)
   333  (Load <t> ptr mem) && (is16BitInt(t) && isSigned(t))  => (MOVHload ptr mem)
   334  (Load <t> ptr mem) && (is16BitInt(t) && !isSigned(t)) => (MOVHUload ptr mem)
   335  (Load <t> ptr mem) && (is32BitInt(t) && isSigned(t))  => (MOVWload ptr mem)
   336  (Load <t> ptr mem) && (is32BitInt(t) && !isSigned(t)) => (MOVWUload ptr mem)
   337  (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) => (MOVDload ptr mem)
   338  (Load <t> ptr mem) && is32BitFloat(t) => (FMOVSload ptr mem)
   339  (Load <t> ptr mem) && is64BitFloat(t) => (FMOVDload ptr mem)
   340  
   341  // stores
   342  (Store {t} ptr val mem) && t.Size() == 1 => (MOVBstore ptr val mem)
   343  (Store {t} ptr val mem) && t.Size() == 2 => (MOVHstore ptr val mem)
   344  (Store {t} ptr val mem) && t.Size() == 4 && !is32BitFloat(val.Type) => (MOVWstore ptr val mem)
   345  (Store {t} ptr val mem) && t.Size() == 8 && !is64BitFloat(val.Type) => (MOVDstore ptr val mem)
   346  (Store {t} ptr val mem) && t.Size() == 4 && is32BitFloat(val.Type) => (FMOVSstore ptr val mem)
   347  (Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (FMOVDstore ptr val mem)
   348  
   349  // zeroing
   350  (Zero [0] _ mem) => mem
   351  (Zero [1] ptr mem) => (MOVBstore ptr (MOVDconst [0]) mem)
   352  (Zero [2] ptr mem) => (MOVHstore ptr (MOVDconst [0]) mem)
   353  (Zero [4] ptr mem) => (MOVWstore ptr (MOVDconst [0]) mem)
   354  (Zero [8] ptr mem) => (MOVDstore ptr (MOVDconst [0]) mem)
   355  
   356  (Zero [3] ptr mem) =>
   357  	(MOVBstore [2] ptr (MOVDconst [0])
   358  		(MOVHstore ptr (MOVDconst [0]) mem))
   359  (Zero [5] ptr mem) =>
   360  	(MOVBstore [4] ptr (MOVDconst [0])
   361  		(MOVWstore ptr (MOVDconst [0]) mem))
   362  (Zero [6] ptr mem) =>
   363  	(MOVHstore [4] ptr (MOVDconst [0])
   364  		(MOVWstore ptr (MOVDconst [0]) mem))
   365  (Zero [7] ptr mem) =>
   366  	(MOVBstore [6] ptr (MOVDconst [0])
   367  		(MOVHstore [4] ptr (MOVDconst [0])
   368  			(MOVWstore ptr (MOVDconst [0]) mem)))
   369  (Zero [9] ptr mem) =>
   370  	(MOVBstore [8] ptr (MOVDconst [0])
   371  		(MOVDstore ptr (MOVDconst [0]) mem))
   372  (Zero [10] ptr mem) =>
   373  	(MOVHstore [8] ptr (MOVDconst [0])
   374  		(MOVDstore ptr (MOVDconst [0]) mem))
   375  (Zero [11] ptr mem) =>
   376  	(MOVBstore [10] ptr (MOVDconst [0])
   377  		(MOVHstore [8] ptr (MOVDconst [0])
   378  			(MOVDstore ptr (MOVDconst [0]) mem)))
   379  (Zero [12] ptr mem) =>
   380  	(MOVWstore [8] ptr (MOVDconst [0])
   381  		(MOVDstore ptr (MOVDconst [0]) mem))
   382  (Zero [13] ptr mem) =>
   383  	(MOVBstore [12] ptr (MOVDconst [0])
   384  		(MOVWstore [8] ptr (MOVDconst [0])
   385  			(MOVDstore ptr (MOVDconst [0]) mem)))
   386  (Zero [14] ptr mem) =>
   387  	(MOVHstore [12] ptr (MOVDconst [0])
   388  		(MOVWstore [8] ptr (MOVDconst [0])
   389  			(MOVDstore ptr (MOVDconst [0]) mem)))
   390  (Zero [15] ptr mem) =>
   391  	(MOVBstore [14] ptr (MOVDconst [0])
   392  		(MOVHstore [12] ptr (MOVDconst [0])
   393  			(MOVWstore [8] ptr (MOVDconst [0])
   394  				(MOVDstore ptr (MOVDconst [0]) mem))))
   395  (Zero [16] ptr mem) =>
   396  	(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)
   397  
   398  (Zero [32] ptr mem) =>
   399  	(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
   400  		(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))
   401  
   402  (Zero [48] ptr mem) =>
   403  	(STP [32] ptr (MOVDconst [0]) (MOVDconst [0])
   404  		(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
   405  			(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)))
   406  
   407  (Zero [64] ptr mem) =>
   408  	(STP [48] ptr (MOVDconst [0]) (MOVDconst [0])
   409  		(STP [32] ptr (MOVDconst [0]) (MOVDconst [0])
   410  			(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
   411  				(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))))
   412  
   413  // strip off fractional word zeroing
   414  (Zero [s] ptr mem) && s%16 != 0 && s%16 <= 8 && s > 16 =>
   415  	(Zero [8]
   416  		(OffPtr <ptr.Type> ptr [s-8])
   417  		(Zero [s-s%16] ptr mem))
   418  (Zero [s] ptr mem) && s%16 != 0 && s%16 > 8 && s > 16 =>
   419  	(Zero [16]
   420  		(OffPtr <ptr.Type> ptr [s-16])
   421  		(Zero [s-s%16] ptr mem))
   422  
   423  // medium zeroing uses a duff device
   424  // 4, 16, and 64 are magic constants, see runtime/mkduff.go
   425  (Zero [s] ptr mem)
   426  	&& s%16 == 0 && s > 64 && s <= 16*64
   427  	&& !config.noDuffDevice =>
   428  	(DUFFZERO [4 * (64 - s/16)] ptr mem)
   429  
   430  // large zeroing uses a loop
   431  (Zero [s] ptr mem)
   432  	&& s%16 == 0 && (s > 16*64 || config.noDuffDevice) =>
   433  	(LoweredZero
   434  		ptr
   435  		(ADDconst <ptr.Type> [s-16] ptr)
   436  		mem)
   437  
   438  // moves
   439  (Move [0] _ _ mem) => mem
   440  (Move [1] dst src mem) => (MOVBstore dst (MOVBUload src mem) mem)
   441  (Move [2] dst src mem) => (MOVHstore dst (MOVHUload src mem) mem)
   442  (Move [4] dst src mem) => (MOVWstore dst (MOVWUload src mem) mem)
   443  (Move [8] dst src mem) => (MOVDstore dst (MOVDload src mem) mem)
   444  
   445  (Move [3] dst src mem) =>
   446  	(MOVBstore [2] dst (MOVBUload [2] src mem)
   447  		(MOVHstore dst (MOVHUload src mem) mem))
   448  (Move [5] dst src mem) =>
   449  	(MOVBstore [4] dst (MOVBUload [4] src mem)
   450  		(MOVWstore dst (MOVWUload src mem) mem))
   451  (Move [6] dst src mem) =>
   452  	(MOVHstore [4] dst (MOVHUload [4] src mem)
   453  		(MOVWstore dst (MOVWUload src mem) mem))
   454  (Move [7] dst src mem) =>
   455  	(MOVBstore [6] dst (MOVBUload [6] src mem)
   456  		(MOVHstore [4] dst (MOVHUload [4] src mem)
   457  			(MOVWstore dst (MOVWUload src mem) mem)))
   458  (Move [12] dst src mem) =>
   459  	(MOVWstore [8] dst (MOVWUload [8] src mem)
   460  		(MOVDstore dst (MOVDload src mem) mem))
   461  (Move [16] dst src mem) =>
   462  	(MOVDstore [8] dst (MOVDload [8] src mem)
   463  		(MOVDstore dst (MOVDload src mem) mem))
   464  (Move [24] dst src mem) =>
   465  	(MOVDstore [16] dst (MOVDload [16] src mem)
   466  		(MOVDstore [8] dst (MOVDload [8] src mem)
   467  			(MOVDstore dst (MOVDload src mem) mem)))
   468  
   469  // strip off fractional word move
   470  (Move [s] dst src mem) && s%8 != 0 && s > 8 =>
   471  	(Move [s%8]
   472  		(OffPtr <dst.Type> dst [s-s%8])
   473  		(OffPtr <src.Type> src [s-s%8])
   474  		(Move [s-s%8] dst src mem))
   475  
   476  // medium move uses a duff device
   477  (Move [s] dst src mem)
   478  	&& s > 32 && s <= 16*64 && s%16 == 8
   479  	&& !config.noDuffDevice && logLargeCopy(v, s) =>
   480  	(MOVDstore [int32(s-8)] dst (MOVDload [int32(s-8)] src mem)
   481  		(DUFFCOPY <types.TypeMem> [8*(64-(s-8)/16)] dst src mem))
   482  (Move [s] dst src mem)
   483  	&& s > 32 && s <= 16*64 && s%16 == 0
   484  	&& !config.noDuffDevice && logLargeCopy(v, s) =>
   485  	(DUFFCOPY [8 * (64 - s/16)] dst src mem)
   486  // 8 is the number of bytes to encode:
   487  //
   488  // LDP.P   16(R16), (R26, R27)
   489  // STP.P   (R26, R27), 16(R17)
   490  //
   491  // 64 is number of these blocks. See runtime/duff_arm64.s:duffcopy
   492  
   493  // large move uses a loop
   494  (Move [s] dst src mem)
   495  	&& s > 24 && s%8 == 0 && logLargeCopy(v, s) =>
   496  	(LoweredMove
   497  		dst
   498  		src
   499  		(ADDconst <src.Type> src [s-8])
   500  		mem)
   501  
   502  // calls
   503  (StaticCall ...) => (CALLstatic ...)
   504  (ClosureCall ...) => (CALLclosure ...)
   505  (InterCall ...) => (CALLinter ...)
   506  (TailCall ...) => (CALLtail ...)
   507  
   508  // checks
   509  (NilCheck ...) => (LoweredNilCheck ...)
   510  (IsNonNil ptr) => (NotEqual (CMPconst [0] ptr))
   511  (IsInBounds idx len) => (LessThanU (CMP idx len))
   512  (IsSliceInBounds idx len) => (LessEqualU (CMP idx len))
   513  
   514  // pseudo-ops
   515  (GetClosurePtr ...) => (LoweredGetClosurePtr ...)
   516  (GetCallerSP ...) => (LoweredGetCallerSP ...)
   517  (GetCallerPC ...) => (LoweredGetCallerPC ...)
   518  
   519  // Absorb pseudo-ops into blocks.
   520  (If (Equal cc) yes no) => (EQ cc yes no)
   521  (If (NotEqual cc) yes no) => (NE cc yes no)
   522  (If (LessThan cc) yes no) => (LT cc yes no)
   523  (If (LessThanU cc) yes no) => (ULT cc yes no)
   524  (If (LessEqual cc) yes no) => (LE cc yes no)
   525  (If (LessEqualU cc) yes no) => (ULE cc yes no)
   526  (If (GreaterThan cc) yes no) => (GT cc yes no)
   527  (If (GreaterThanU cc) yes no) => (UGT cc yes no)
   528  (If (GreaterEqual cc) yes no) => (GE cc yes no)
   529  (If (GreaterEqualU cc) yes no) => (UGE cc yes no)
   530  (If (LessThanF cc) yes no) => (FLT cc yes no)
   531  (If (LessEqualF cc) yes no) => (FLE cc yes no)
   532  (If (GreaterThanF cc) yes no) => (FGT cc yes no)
   533  (If (GreaterEqualF cc) yes no) => (FGE cc yes no)
   534  
   535  (If cond yes no) => (NZ cond yes no)
   536  
   537  // atomic intrinsics
   538  // Note: these ops do not accept offset.
   539  (AtomicLoad8   ...) => (LDARB ...)
   540  (AtomicLoad32  ...) => (LDARW ...)
   541  (AtomicLoad64  ...) => (LDAR  ...)
   542  (AtomicLoadPtr ...) => (LDAR  ...)
   543  
   544  (AtomicStore8       ...) => (STLRB ...)
   545  (AtomicStore32      ...) => (STLRW ...)
   546  (AtomicStore64      ...) => (STLR  ...)
   547  (AtomicStorePtrNoWB ...) => (STLR  ...)
   548  
   549  (AtomicExchange(32|64)       ...) => (LoweredAtomicExchange(32|64) ...)
   550  (AtomicAdd(32|64)            ...) => (LoweredAtomicAdd(32|64) ...)
   551  (AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...)
   552  
   553  (AtomicAdd(32|64)Variant            ...) => (LoweredAtomicAdd(32|64)Variant      ...)
   554  (AtomicExchange(32|64)Variant       ...) => (LoweredAtomicExchange(32|64)Variant ...)
   555  (AtomicCompareAndSwap(32|64)Variant ...) => (LoweredAtomicCas(32|64)Variant      ...)
   556  
   557  // Currently the updated value is not used, but we need a register to temporarily hold it.
   558  (AtomicAnd8  ptr val mem) => (Select1 (LoweredAtomicAnd8  ptr val mem))
   559  (AtomicAnd32 ptr val mem) => (Select1 (LoweredAtomicAnd32 ptr val mem))
   560  (AtomicOr8   ptr val mem) => (Select1 (LoweredAtomicOr8   ptr val mem))
   561  (AtomicOr32  ptr val mem) => (Select1 (LoweredAtomicOr32  ptr val mem))
   562  
   563  (AtomicAnd8Variant  ptr val mem) => (Select1 (LoweredAtomicAnd8Variant  ptr val mem))
   564  (AtomicAnd32Variant ptr val mem) => (Select1 (LoweredAtomicAnd32Variant ptr val mem))
   565  (AtomicOr8Variant   ptr val mem) => (Select1 (LoweredAtomicOr8Variant   ptr val mem))
   566  (AtomicOr32Variant  ptr val mem) => (Select1 (LoweredAtomicOr32Variant  ptr val mem))
   567  
   568  // Write barrier.
   569  (WB ...) => (LoweredWB ...)
   570  
   571  // Publication barrier (0xe is ST option)
   572  (PubBarrier mem) => (DMB [0xe] mem)
   573  
   574  (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
   575  (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
   576  (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
   577  
   578  // Optimizations
   579  
   580  // Absorb boolean tests into block
   581  (NZ (Equal cc) yes no) => (EQ cc yes no)
   582  (NZ (NotEqual cc) yes no) => (NE cc yes no)
   583  (NZ (LessThan cc) yes no) => (LT cc yes no)
   584  (NZ (LessThanU cc) yes no) => (ULT cc yes no)
   585  (NZ (LessEqual cc) yes no) => (LE cc yes no)
   586  (NZ (LessEqualU cc) yes no) => (ULE cc yes no)
   587  (NZ (GreaterThan cc) yes no) => (GT cc yes no)
   588  (NZ (GreaterThanU cc) yes no) => (UGT cc yes no)
   589  (NZ (GreaterEqual cc) yes no) => (GE cc yes no)
   590  (NZ (GreaterEqualU cc) yes no) => (UGE cc yes no)
   591  (NZ (LessThanF cc) yes no) => (FLT cc yes no)
   592  (NZ (LessEqualF cc) yes no) => (FLE cc yes no)
   593  (NZ (GreaterThanF cc) yes no) => (FGT cc yes no)
   594  (NZ (GreaterEqualF cc) yes no) => (FGE cc yes no)
   595  
   596  (EQ (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (EQ (TSTWconst [int32(c)] y) yes no)
   597  (NE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (NE (TSTWconst [int32(c)] y) yes no)
   598  (LT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LT (TSTWconst [int32(c)] y) yes no)
   599  (LE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LE (TSTWconst [int32(c)] y) yes no)
   600  (GT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GT (TSTWconst [int32(c)] y) yes no)
   601  (GE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GE (TSTWconst [int32(c)] y) yes no)
   602  
   603  (EQ (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (EQ (TST x y) yes no)
   604  (NE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (NE (TST x y) yes no)
   605  (LT (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LT (TST x y) yes no)
   606  (LE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LE (TST x y) yes no)
   607  (GT (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GT (TST x y) yes no)
   608  (GE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GE (TST x y) yes no)
   609  
   610  (EQ (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (EQ (TSTW x y) yes no)
   611  (NE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (NE (TSTW x y) yes no)
   612  (LT (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LT (TSTW x y) yes no)
   613  (LE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LE (TSTW x y) yes no)
   614  (GT (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GT (TSTW x y) yes no)
   615  (GE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GE (TSTW x y) yes no)
   616  
   617  (EQ (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (EQ (TSTconst [c] y) yes no)
   618  (NE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (NE (TSTconst [c] y) yes no)
   619  (LT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LT (TSTconst [c] y) yes no)
   620  (LE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LE (TSTconst [c] y) yes no)
   621  (GT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GT (TSTconst [c] y) yes no)
   622  (GE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GE (TSTconst [c] y) yes no)
   623  
   624  (EQ (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (EQ (CMNconst [c] y) yes no)
   625  (NE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (NE (CMNconst [c] y) yes no)
   626  (LT (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LTnoov (CMNconst [c] y) yes no)
   627  (LE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LEnoov (CMNconst [c] y) yes no)
   628  (GT (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GTnoov (CMNconst [c] y) yes no)
   629  (GE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GEnoov (CMNconst [c] y) yes no)
   630  
   631  (EQ (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (EQ (CMNWconst [int32(c)] y) yes no)
   632  (NE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (NE (CMNWconst [int32(c)] y) yes no)
   633  (LT (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LTnoov (CMNWconst [int32(c)] y) yes no)
   634  (LE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LEnoov (CMNWconst [int32(c)] y) yes no)
   635  (GT (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GTnoov (CMNWconst [int32(c)] y) yes no)
   636  (GE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GEnoov (CMNWconst [int32(c)] y) yes no)
   637  
   638  (EQ (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (EQ (CMN x y) yes no)
   639  (NE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (NE (CMN x y) yes no)
   640  (LT (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LTnoov (CMN x y) yes no)
   641  (LE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LEnoov (CMN x y) yes no)
   642  (GT (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GTnoov (CMN x y) yes no)
   643  (GE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GEnoov (CMN x y) yes no)
   644  
   645  (EQ (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (EQ (CMNW x y) yes no)
   646  (NE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (NE (CMNW x y) yes no)
   647  (LT (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LTnoov (CMNW x y) yes no)
   648  (LE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LEnoov (CMNW x y) yes no)
   649  (GT (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GTnoov (CMNW x y) yes no)
   650  (GE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GEnoov (CMNW x y) yes no)
   651  
   652  // CMP(x,-y) -> CMN(x,y) is only valid for unordered comparison, if y can be -1<<63
   653  (EQ (CMP x z:(NEG y)) yes no) && z.Uses == 1 => (EQ (CMN x y) yes no)
   654  (NE (CMP x z:(NEG y)) yes no) && z.Uses == 1 => (NE (CMN x y) yes no)
   655  
   656  // CMPW(x,-y) -> CMNW(x,y) is only valid for unordered comparison, if y can be -1<<31
   657  (EQ (CMPW x z:(NEG y)) yes no) && z.Uses == 1 => (EQ (CMNW x y) yes no)
   658  (NE (CMPW x z:(NEG y)) yes no) && z.Uses == 1 => (NE (CMNW x y) yes no)
   659  
   660  (EQ (CMPconst [0] x) yes no) => (Z x yes no)
   661  (NE (CMPconst [0] x) yes no) => (NZ x yes no)
   662  (EQ (CMPWconst [0] x) yes no) => (ZW x yes no)
   663  (NE (CMPWconst [0] x) yes no) => (NZW x yes no)
   664  
   665  (EQ (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (EQ (CMN a (MUL <x.Type> x y)) yes no)
   666  (NE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (NE (CMN a (MUL <x.Type> x y)) yes no)
   667  (LT (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (LTnoov (CMN a (MUL <x.Type> x y)) yes no)
   668  (LE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (LEnoov (CMN a (MUL <x.Type> x y)) yes no)
   669  (GT (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (GTnoov (CMN a (MUL <x.Type> x y)) yes no)
   670  (GE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (GEnoov (CMN a (MUL <x.Type> x y)) yes no)
   671  
   672  (EQ (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (EQ (CMP a (MUL <x.Type> x y)) yes no)
   673  (NE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (NE (CMP a (MUL <x.Type> x y)) yes no)
   674  (LE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (LEnoov (CMP a (MUL <x.Type> x y)) yes no)
   675  (LT (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (LTnoov (CMP a (MUL <x.Type> x y)) yes no)
   676  (GE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (GEnoov (CMP a (MUL <x.Type> x y)) yes no)
   677  (GT (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (GTnoov (CMP a (MUL <x.Type> x y)) yes no)
   678  
   679  (EQ (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (EQ (CMNW a (MULW <x.Type> x y)) yes no)
   680  (NE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (NE (CMNW a (MULW <x.Type> x y)) yes no)
   681  (LE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (LEnoov (CMNW a (MULW <x.Type> x y)) yes no)
   682  (LT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (LTnoov (CMNW a (MULW <x.Type> x y)) yes no)
   683  (GE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (GEnoov (CMNW a (MULW <x.Type> x y)) yes no)
   684  (GT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (GTnoov (CMNW a (MULW <x.Type> x y)) yes no)
   685  
   686  (EQ (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (EQ (CMPW a (MULW <x.Type> x y)) yes no)
   687  (NE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (NE (CMPW a (MULW <x.Type> x y)) yes no)
   688  (LE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (LEnoov (CMPW a (MULW <x.Type> x y)) yes no)
   689  (LT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (LTnoov (CMPW a (MULW <x.Type> x y)) yes no)
   690  (GE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (GEnoov (CMPW a (MULW <x.Type> x y)) yes no)
   691  (GT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (GTnoov (CMPW a (MULW <x.Type> x y)) yes no)
   692  
   693  // Absorb bit-tests into block
   694  (Z  (ANDconst [c] x) yes no) && oneBit(c) => (TBZ  [int64(ntz64(c))] x yes no)
   695  (NZ (ANDconst [c] x) yes no) && oneBit(c) => (TBNZ [int64(ntz64(c))] x yes no)
   696  (ZW  (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBZ  [int64(ntz64(int64(uint32(c))))] x yes no)
   697  (NZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no)
   698  (EQ (TSTconst [c] x) yes no) && oneBit(c) => (TBZ  [int64(ntz64(c))] x yes no)
   699  (NE (TSTconst [c] x) yes no) && oneBit(c) => (TBNZ [int64(ntz64(c))] x yes no)
   700  (EQ (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBZ  [int64(ntz64(int64(uint32(c))))] x yes no)
   701  (NE (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no)
   702  
   703  // Test sign-bit for signed comparisons against zero
   704  (GE (CMPWconst [0] x) yes no) => (TBZ  [31] x yes no)
   705  (GE (CMPconst [0] x) yes no) => (TBZ  [63] x yes no)
   706  (LT (CMPWconst [0] x) yes no) => (TBNZ  [31] x yes no)
   707  (LT (CMPconst [0] x) yes no) => (TBNZ  [63] x yes no)
   708  
   709  // fold offset into address
   710  (ADDconst [off1] (MOVDaddr [off2] {sym} ptr)) && is32Bit(off1+int64(off2)) =>
   711  	 (MOVDaddr [int32(off1)+off2] {sym} ptr)
   712  
   713  // fold address into load/store
   714  (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   715  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   716  	(MOVBload [off1+int32(off2)] {sym} ptr mem)
   717  (MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   718  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   719  	(MOVBUload [off1+int32(off2)] {sym} ptr mem)
   720  (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   721  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   722  	(MOVHload [off1+int32(off2)] {sym} ptr mem)
   723  (MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   724  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   725  	(MOVHUload [off1+int32(off2)] {sym} ptr mem)
   726  (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   727  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   728  	(MOVWload [off1+int32(off2)] {sym} ptr mem)
   729  (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   730  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   731  	(MOVWUload [off1+int32(off2)] {sym} ptr mem)
   732  (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   733  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   734  	(MOVDload [off1+int32(off2)] {sym} ptr mem)
   735  (FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   736  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   737  	(FMOVSload [off1+int32(off2)] {sym} ptr mem)
   738  (FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   739  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   740  	(FMOVDload [off1+int32(off2)] {sym} ptr mem)
   741  
   742  // register indexed load
   743  (MOVDload  [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVDloadidx ptr idx mem)
   744  (MOVWUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVWUloadidx ptr idx mem)
   745  (MOVWload  [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVWloadidx ptr idx mem)
   746  (MOVHUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVHUloadidx ptr idx mem)
   747  (MOVHload  [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVHloadidx ptr idx mem)
   748  (MOVBUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVBUloadidx ptr idx mem)
   749  (MOVBload  [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVBloadidx ptr idx mem)
   750  (FMOVSload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (FMOVSloadidx ptr idx mem)
   751  (FMOVDload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (FMOVDloadidx ptr idx mem)
   752  (MOVDloadidx  ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVDload  [int32(c)] ptr mem)
   753  (MOVDloadidx  (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVDload  [int32(c)] ptr mem)
   754  (MOVWUloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVWUload [int32(c)] ptr mem)
   755  (MOVWUloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVWUload [int32(c)] ptr mem)
   756  (MOVWloadidx  ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVWload  [int32(c)] ptr mem)
   757  (MOVWloadidx  (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVWload  [int32(c)] ptr mem)
   758  (MOVHUloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVHUload [int32(c)] ptr mem)
   759  (MOVHUloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVHUload [int32(c)] ptr mem)
   760  (MOVHloadidx  ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVHload  [int32(c)] ptr mem)
   761  (MOVHloadidx  (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVHload  [int32(c)] ptr mem)
   762  (MOVBUloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVBUload [int32(c)] ptr mem)
   763  (MOVBUloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVBUload [int32(c)] ptr mem)
   764  (MOVBloadidx  ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVBload  [int32(c)] ptr mem)
   765  (MOVBloadidx  (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVBload  [int32(c)] ptr mem)
   766  (FMOVSloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (FMOVSload [int32(c)] ptr mem)
   767  (FMOVSloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (FMOVSload [int32(c)] ptr mem)
   768  (FMOVDloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (FMOVDload [int32(c)] ptr mem)
   769  (FMOVDloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (FMOVDload [int32(c)] ptr mem)
   770  
   771  // shifted register indexed load
   772  (MOVDload  [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil => (MOVDloadidx8 ptr idx mem)
   773  (MOVWUload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil => (MOVWUloadidx4 ptr idx mem)
   774  (MOVWload  [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil => (MOVWloadidx4 ptr idx mem)
   775  (MOVHUload [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil => (MOVHUloadidx2 ptr idx mem)
   776  (MOVHload  [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil => (MOVHloadidx2 ptr idx mem)
   777  (MOVDloadidx  ptr (SLLconst [3] idx) mem) => (MOVDloadidx8 ptr idx mem)
   778  (MOVWloadidx  ptr (SLLconst [2] idx) mem) => (MOVWloadidx4 ptr idx mem)
   779  (MOVWUloadidx ptr (SLLconst [2] idx) mem) => (MOVWUloadidx4 ptr idx mem)
   780  (MOVHloadidx  ptr (SLLconst [1] idx) mem) => (MOVHloadidx2 ptr idx mem)
   781  (MOVHUloadidx ptr (SLLconst [1] idx) mem) => (MOVHUloadidx2 ptr idx mem)
   782  (MOVHloadidx  ptr (ADD idx idx) mem) => (MOVHloadidx2 ptr idx mem)
   783  (MOVHUloadidx ptr (ADD idx idx) mem) => (MOVHUloadidx2 ptr idx mem)
   784  (MOVDloadidx  (SLLconst [3] idx) ptr mem) => (MOVDloadidx8 ptr idx mem)
   785  (MOVWloadidx  (SLLconst [2] idx) ptr mem) => (MOVWloadidx4 ptr idx mem)
   786  (MOVWUloadidx (SLLconst [2] idx) ptr mem) => (MOVWUloadidx4 ptr idx mem)
   787  (MOVHloadidx  (ADD idx idx) ptr mem) => (MOVHloadidx2 ptr idx mem)
   788  (MOVHUloadidx (ADD idx idx) ptr mem) => (MOVHUloadidx2 ptr idx mem)
   789  (MOVDloadidx8  ptr (MOVDconst [c]) mem) && is32Bit(c<<3) => (MOVDload  [int32(c)<<3] ptr mem)
   790  (MOVWUloadidx4 ptr (MOVDconst [c]) mem) && is32Bit(c<<2) => (MOVWUload [int32(c)<<2] ptr mem)
   791  (MOVWloadidx4  ptr (MOVDconst [c]) mem) && is32Bit(c<<2) => (MOVWload  [int32(c)<<2] ptr mem)
   792  (MOVHUloadidx2 ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHUload [int32(c)<<1] ptr mem)
   793  (MOVHloadidx2  ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHload  [int32(c)<<1] ptr mem)
   794  
   795  (FMOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil => (FMOVDloadidx8 ptr idx mem)
   796  (FMOVSload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil => (FMOVSloadidx4 ptr idx mem)
   797  (FMOVDloadidx ptr (SLLconst [3] idx) mem) => (FMOVDloadidx8 ptr idx mem)
   798  (FMOVSloadidx ptr (SLLconst [2] idx) mem) => (FMOVSloadidx4 ptr idx mem)
   799  (FMOVDloadidx (SLLconst [3] idx) ptr mem) => (FMOVDloadidx8 ptr idx mem)
   800  (FMOVSloadidx (SLLconst [2] idx) ptr mem) => (FMOVSloadidx4 ptr idx mem)
   801  (FMOVDloadidx8 ptr (MOVDconst [c]) mem) && is32Bit(c<<3) => (FMOVDload ptr [int32(c)<<3] mem)
   802  (FMOVSloadidx4 ptr (MOVDconst [c]) mem) && is32Bit(c<<2) => (FMOVSload ptr [int32(c)<<2] mem)
   803  
   804  (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
   805  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   806  	(MOVBstore [off1+int32(off2)] {sym} ptr val mem)
   807  (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
   808  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   809  	(MOVHstore [off1+int32(off2)] {sym} ptr val mem)
   810  (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
   811  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   812  	(MOVWstore [off1+int32(off2)] {sym} ptr val mem)
   813  (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
   814  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   815  	(MOVDstore [off1+int32(off2)] {sym} ptr val mem)
   816  (STP [off1] {sym} (ADDconst [off2] ptr) val1 val2 mem) && is32Bit(int64(off1)+off2)
   817  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   818  	(STP [off1+int32(off2)] {sym} ptr val1 val2 mem)
   819  (FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
   820  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   821  	(FMOVSstore [off1+int32(off2)] {sym} ptr val mem)
   822  (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
   823  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   824  	(FMOVDstore [off1+int32(off2)] {sym} ptr val mem)
   825  (MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   826  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   827  	(MOVBstorezero [off1+int32(off2)] {sym} ptr mem)
   828  (MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   829  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   830  	(MOVHstorezero [off1+int32(off2)] {sym} ptr mem)
   831  (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   832  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   833  	(MOVWstorezero [off1+int32(off2)] {sym} ptr mem)
   834  (MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   835  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   836  	(MOVDstorezero [off1+int32(off2)] {sym} ptr mem)
   837  (MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   838  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   839  	(MOVQstorezero [off1+int32(off2)] {sym} ptr mem)
   840  
   841  // register indexed store
   842  (MOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx ptr idx val mem)
   843  (MOVWstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVWstoreidx ptr idx val mem)
   844  (MOVHstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVHstoreidx ptr idx val mem)
   845  (MOVBstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVBstoreidx ptr idx val mem)
   846  (FMOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (FMOVDstoreidx ptr idx val mem)
   847  (FMOVSstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (FMOVSstoreidx ptr idx val mem)
   848  (MOVDstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVDstore [int32(c)] ptr val mem)
   849  (MOVDstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVDstore [int32(c)] idx val mem)
   850  (MOVWstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVWstore [int32(c)] ptr val mem)
   851  (MOVWstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVWstore [int32(c)] idx val mem)
   852  (MOVHstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVHstore [int32(c)] ptr val mem)
   853  (MOVHstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVHstore [int32(c)] idx val mem)
   854  (MOVBstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVBstore [int32(c)] ptr val mem)
   855  (MOVBstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVBstore [int32(c)] idx val mem)
   856  (FMOVDstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (FMOVDstore [int32(c)] ptr val mem)
   857  (FMOVDstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (FMOVDstore [int32(c)] idx val mem)
   858  (FMOVSstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (FMOVSstore [int32(c)] ptr val mem)
   859  (FMOVSstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (FMOVSstore [int32(c)] idx val mem)
   860  
   861  // shifted register indexed store
   862  (MOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx8 ptr idx val mem)
   863  (MOVWstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) && off == 0 && sym == nil => (MOVWstoreidx4 ptr idx val mem)
   864  (MOVHstore [off] {sym} (ADDshiftLL [1] ptr idx) val mem) && off == 0 && sym == nil => (MOVHstoreidx2 ptr idx val mem)
   865  (MOVDstoreidx ptr (SLLconst [3] idx) val mem) => (MOVDstoreidx8 ptr idx val mem)
   866  (MOVWstoreidx ptr (SLLconst [2] idx) val mem) => (MOVWstoreidx4 ptr idx val mem)
   867  (MOVHstoreidx ptr (SLLconst [1] idx) val mem) => (MOVHstoreidx2 ptr idx val mem)
   868  (MOVHstoreidx ptr (ADD idx idx) val mem) => (MOVHstoreidx2 ptr idx val mem)
   869  (MOVDstoreidx (SLLconst [3] idx) ptr val mem) => (MOVDstoreidx8 ptr idx val mem)
   870  (MOVWstoreidx (SLLconst [2] idx) ptr val mem) => (MOVWstoreidx4 ptr idx val mem)
   871  (MOVHstoreidx (SLLconst [1] idx) ptr val mem) => (MOVHstoreidx2 ptr idx val mem)
   872  (MOVHstoreidx (ADD idx idx) ptr val mem) => (MOVHstoreidx2 ptr idx val mem)
   873  (MOVDstoreidx8 ptr (MOVDconst [c]) val mem) && is32Bit(c<<3) => (MOVDstore [int32(c)<<3] ptr val mem)
   874  (MOVWstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (MOVWstore [int32(c)<<2] ptr val mem)
   875  (MOVHstoreidx2 ptr (MOVDconst [c]) val mem) && is32Bit(c<<1) => (MOVHstore [int32(c)<<1] ptr val mem)
   876  
   877  (FMOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem) && off == 0 && sym == nil => (FMOVDstoreidx8 ptr idx val mem)
   878  (FMOVSstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) && off == 0 && sym == nil => (FMOVSstoreidx4 ptr idx val mem)
   879  (FMOVDstoreidx ptr (SLLconst [3] idx) val mem) => (FMOVDstoreidx8 ptr idx val mem)
   880  (FMOVSstoreidx ptr (SLLconst [2] idx) val mem) => (FMOVSstoreidx4 ptr idx val mem)
   881  (FMOVDstoreidx (SLLconst [3] idx) ptr val mem) => (FMOVDstoreidx8 ptr idx val mem)
   882  (FMOVSstoreidx (SLLconst [2] idx) ptr val mem) => (FMOVSstoreidx4 ptr idx val mem)
   883  (FMOVDstoreidx8 ptr (MOVDconst [c]) val mem) && is32Bit(c<<3) => (FMOVDstore [int32(c)<<3] ptr val mem)
   884  (FMOVSstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (FMOVSstore [int32(c)<<2] ptr val mem)
   885  
   886  (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   887  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   888  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   889  	(MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   890  (MOVBUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   891  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   892  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   893  	(MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   894  (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   895  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   896  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   897  	(MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   898  (MOVHUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   899  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   900  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   901  	(MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   902  (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   903  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   904  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   905  	(MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   906  (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   907  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   908  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   909  	(MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   910  (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   911  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   912  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   913  	(MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   914  (FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   915  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   916  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   917  	(FMOVSload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   918  (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   919  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   920  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   921  	(FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   922  
   923  (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
   924  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   925  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   926  	(MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
   927  (MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
   928  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   929  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   930  	(MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
   931  (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
   932  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   933  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   934  	(MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
   935  (MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
   936  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   937  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   938  	(MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
   939  (STP [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val1 val2 mem)
   940  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   941  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   942  	(STP [off1+off2] {mergeSym(sym1,sym2)} ptr val1 val2 mem)
   943  (FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
   944  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   945  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   946  	(FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
   947  (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
   948  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   949  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   950  	(FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
   951  (MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   952  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   953  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   954  	(MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   955  (MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   956  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   957  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   958  	(MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   959  (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   960  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   961  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   962  	(MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   963  (MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   964  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   965  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   966  	(MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   967  (MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   968  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
   969  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   970  	(MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   971  
   972  // store zero
   973  (MOVBstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVBstorezero [off] {sym} ptr mem)
   974  (MOVHstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVHstorezero [off] {sym} ptr mem)
   975  (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVWstorezero [off] {sym} ptr mem)
   976  (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVDstorezero [off] {sym} ptr mem)
   977  (STP [off] {sym} ptr (MOVDconst [0]) (MOVDconst [0]) mem) => (MOVQstorezero [off] {sym} ptr mem)
   978  
   979  // register indexed store zero
   980  (MOVDstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVDstorezeroidx ptr idx mem)
   981  (MOVWstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVWstorezeroidx ptr idx mem)
   982  (MOVHstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVHstorezeroidx ptr idx mem)
   983  (MOVBstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVBstorezeroidx ptr idx mem)
   984  (MOVDstoreidx ptr idx (MOVDconst [0]) mem) => (MOVDstorezeroidx ptr idx mem)
   985  (MOVWstoreidx ptr idx (MOVDconst [0]) mem) => (MOVWstorezeroidx ptr idx mem)
   986  (MOVHstoreidx ptr idx (MOVDconst [0]) mem) => (MOVHstorezeroidx ptr idx mem)
   987  (MOVBstoreidx ptr idx (MOVDconst [0]) mem) => (MOVBstorezeroidx ptr idx mem)
   988  (MOVDstorezeroidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVDstorezero [int32(c)] ptr mem)
   989  (MOVDstorezeroidx (MOVDconst [c]) idx mem) && is32Bit(c) => (MOVDstorezero [int32(c)] idx mem)
   990  (MOVWstorezeroidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVWstorezero [int32(c)] ptr mem)
   991  (MOVWstorezeroidx (MOVDconst [c]) idx mem) && is32Bit(c) => (MOVWstorezero [int32(c)] idx mem)
   992  (MOVHstorezeroidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVHstorezero [int32(c)] ptr mem)
   993  (MOVHstorezeroidx (MOVDconst [c]) idx mem) && is32Bit(c) => (MOVHstorezero [int32(c)] idx mem)
   994  (MOVBstorezeroidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVBstorezero [int32(c)] ptr mem)
   995  (MOVBstorezeroidx (MOVDconst [c]) idx mem) && is32Bit(c) => (MOVBstorezero [int32(c)] idx mem)
   996  
   997  // shifted register indexed store zero
   998  (MOVDstorezero [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil => (MOVDstorezeroidx8 ptr idx mem)
   999  (MOVWstorezero [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil => (MOVWstorezeroidx4 ptr idx mem)
  1000  (MOVHstorezero [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil => (MOVHstorezeroidx2 ptr idx mem)
  1001  (MOVDstorezeroidx ptr (SLLconst [3] idx) mem) => (MOVDstorezeroidx8 ptr idx mem)
  1002  (MOVWstorezeroidx ptr (SLLconst [2] idx) mem) => (MOVWstorezeroidx4 ptr idx mem)
  1003  (MOVHstorezeroidx ptr (SLLconst [1] idx) mem) => (MOVHstorezeroidx2 ptr idx mem)
  1004  (MOVHstorezeroidx ptr (ADD idx idx) mem) => (MOVHstorezeroidx2 ptr idx mem)
  1005  (MOVDstorezeroidx (SLLconst [3] idx) ptr mem) => (MOVDstorezeroidx8 ptr idx mem)
  1006  (MOVWstorezeroidx (SLLconst [2] idx) ptr mem) => (MOVWstorezeroidx4 ptr idx mem)
  1007  (MOVHstorezeroidx (SLLconst [1] idx) ptr mem) => (MOVHstorezeroidx2 ptr idx mem)
  1008  (MOVHstorezeroidx (ADD idx idx) ptr mem) => (MOVHstorezeroidx2 ptr idx mem)
  1009  (MOVDstoreidx8 ptr idx (MOVDconst [0]) mem) => (MOVDstorezeroidx8 ptr idx mem)
  1010  (MOVWstoreidx4 ptr idx (MOVDconst [0]) mem) => (MOVWstorezeroidx4 ptr idx mem)
  1011  (MOVHstoreidx2 ptr idx (MOVDconst [0]) mem) => (MOVHstorezeroidx2 ptr idx mem)
  1012  (MOVDstorezeroidx8 ptr (MOVDconst [c]) mem) && is32Bit(c<<3) => (MOVDstorezero [int32(c<<3)] ptr mem)
  1013  (MOVWstorezeroidx4 ptr (MOVDconst [c]) mem) && is32Bit(c<<2) => (MOVWstorezero [int32(c<<2)] ptr mem)
  1014  (MOVHstorezeroidx2 ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHstorezero [int32(c<<1)] ptr mem)
  1015  
  1016  // replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
  1017  // these seem to have bad interaction with other rules, resulting in slower code
  1018  //(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBreg x)
  1019  //(MOVBUload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBUreg x)
  1020  //(MOVHload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHreg x)
  1021  //(MOVHUload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHUreg x)
  1022  //(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWreg x)
  1023  //(MOVWUload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWUreg x)
  1024  //(MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
  1025  //(FMOVSload [off] {sym} ptr (FMOVSstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
  1026  //(FMOVDload [off] {sym} ptr (FMOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
  1027  
  1028  (MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
  1029  (MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
  1030  (MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
  1031  (MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
  1032  (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
  1033  (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
  1034  (MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
  1035  
  1036  (MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
  1037  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
  1038  (MOVBUloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
  1039  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
  1040  (MOVHloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
  1041  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
  1042  (MOVHUloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
  1043  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
  1044  (MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
  1045  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
  1046  (MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
  1047  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
  1048  (MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _))
  1049  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
  1050  
  1051  (MOVHloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
  1052  (MOVHUloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
  1053  (MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
  1054  (MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
  1055  (MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
  1056  
  1057  // don't extend after proper load
  1058  (MOVBreg x:(MOVBload _ _)) => (MOVDreg x)
  1059  (MOVBUreg x:(MOVBUload _ _)) => (MOVDreg x)
  1060  (MOVHreg x:(MOVBload _ _)) => (MOVDreg x)
  1061  (MOVHreg x:(MOVBUload _ _)) => (MOVDreg x)
  1062  (MOVHreg x:(MOVHload _ _)) => (MOVDreg x)
  1063  (MOVHUreg x:(MOVBUload _ _)) => (MOVDreg x)
  1064  (MOVHUreg x:(MOVHUload _ _)) => (MOVDreg x)
  1065  (MOVWreg x:(MOVBload _ _)) => (MOVDreg x)
  1066  (MOVWreg x:(MOVBUload _ _)) => (MOVDreg x)
  1067  (MOVWreg x:(MOVHload _ _)) => (MOVDreg x)
  1068  (MOVWreg x:(MOVHUload _ _)) => (MOVDreg x)
  1069  (MOVWreg x:(MOVWload _ _)) => (MOVDreg x)
  1070  (MOVWUreg x:(MOVBUload _ _)) => (MOVDreg x)
  1071  (MOVWUreg x:(MOVHUload _ _)) => (MOVDreg x)
  1072  (MOVWUreg x:(MOVWUload _ _)) => (MOVDreg x)
  1073  (MOVBreg x:(MOVBloadidx _  _ _)) => (MOVDreg x)
  1074  (MOVBUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
  1075  (MOVHreg x:(MOVBloadidx _ _ _)) => (MOVDreg x)
  1076  (MOVHreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
  1077  (MOVHreg x:(MOVHloadidx _ _ _)) => (MOVDreg x)
  1078  (MOVHUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
  1079  (MOVHUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x)
  1080  (MOVWreg x:(MOVBloadidx _ _ _)) => (MOVDreg x)
  1081  (MOVWreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
  1082  (MOVWreg x:(MOVHloadidx _ _ _)) => (MOVDreg x)
  1083  (MOVWreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x)
  1084  (MOVWreg x:(MOVWloadidx _ _ _)) => (MOVDreg x)
  1085  (MOVWUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
  1086  (MOVWUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x)
  1087  (MOVWUreg x:(MOVWUloadidx _ _ _)) => (MOVDreg x)
  1088  (MOVHreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x)
  1089  (MOVHUreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x)
  1090  (MOVWreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x)
  1091  (MOVWreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x)
  1092  (MOVWreg x:(MOVWloadidx4 _ _ _)) => (MOVDreg x)
  1093  (MOVWUreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x)
  1094  (MOVWUreg x:(MOVWUloadidx4 _ _ _)) => (MOVDreg x)
  1095  
  1096  // fold double extensions
  1097  (MOVBreg x:(MOVBreg _)) => (MOVDreg x)
  1098  (MOVBUreg x:(MOVBUreg _)) => (MOVDreg x)
  1099  (MOVHreg x:(MOVBreg _)) => (MOVDreg x)
  1100  (MOVHreg x:(MOVBUreg _)) => (MOVDreg x)
  1101  (MOVHreg x:(MOVHreg _)) => (MOVDreg x)
  1102  (MOVHUreg x:(MOVBUreg _)) => (MOVDreg x)
  1103  (MOVHUreg x:(MOVHUreg _)) => (MOVDreg x)
  1104  (MOVWreg x:(MOVBreg _)) => (MOVDreg x)
  1105  (MOVWreg x:(MOVBUreg _)) => (MOVDreg x)
  1106  (MOVWreg x:(MOVHreg _)) => (MOVDreg x)
  1107  (MOVWreg x:(MOVWreg _)) => (MOVDreg x)
  1108  (MOVWUreg x:(MOVBUreg _)) => (MOVDreg x)
  1109  (MOVWUreg x:(MOVHUreg _)) => (MOVDreg x)
  1110  (MOVWUreg x:(MOVWUreg _)) => (MOVDreg x)
  1111  
  1112  // don't extend before store
  1113  (MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
  1114  (MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
  1115  (MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
  1116  (MOVBstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
  1117  (MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
  1118  (MOVBstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
  1119  (MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
  1120  (MOVHstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
  1121  (MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
  1122  (MOVHstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
  1123  (MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem)
  1124  (MOVWstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVWstore [off] {sym} ptr x mem)
  1125  (MOVBstoreidx ptr idx (MOVBreg x) mem) => (MOVBstoreidx ptr idx x mem)
  1126  (MOVBstoreidx ptr idx (MOVBUreg x) mem) => (MOVBstoreidx ptr idx x mem)
  1127  (MOVBstoreidx ptr idx (MOVHreg x) mem) => (MOVBstoreidx ptr idx x mem)
  1128  (MOVBstoreidx ptr idx (MOVHUreg x) mem) => (MOVBstoreidx ptr idx x mem)
  1129  (MOVBstoreidx ptr idx (MOVWreg x) mem) => (MOVBstoreidx ptr idx x mem)
  1130  (MOVBstoreidx ptr idx (MOVWUreg x) mem) => (MOVBstoreidx ptr idx x mem)
  1131  (MOVHstoreidx ptr idx (MOVHreg x) mem) => (MOVHstoreidx ptr idx x mem)
  1132  (MOVHstoreidx ptr idx (MOVHUreg x) mem) => (MOVHstoreidx ptr idx x mem)
  1133  (MOVHstoreidx ptr idx (MOVWreg x) mem) => (MOVHstoreidx ptr idx x mem)
  1134  (MOVHstoreidx ptr idx (MOVWUreg x) mem) => (MOVHstoreidx ptr idx x mem)
  1135  (MOVWstoreidx ptr idx (MOVWreg x) mem) => (MOVWstoreidx ptr idx x mem)
  1136  (MOVWstoreidx ptr idx (MOVWUreg x) mem) => (MOVWstoreidx ptr idx x mem)
  1137  (MOVHstoreidx2 ptr idx (MOVHreg x) mem) => (MOVHstoreidx2 ptr idx x mem)
  1138  (MOVHstoreidx2 ptr idx (MOVHUreg x) mem) => (MOVHstoreidx2 ptr idx x mem)
  1139  (MOVHstoreidx2 ptr idx (MOVWreg x) mem) => (MOVHstoreidx2 ptr idx x mem)
  1140  (MOVHstoreidx2 ptr idx (MOVWUreg x) mem) => (MOVHstoreidx2 ptr idx x mem)
  1141  (MOVWstoreidx4 ptr idx (MOVWreg x) mem) => (MOVWstoreidx4 ptr idx x mem)
  1142  (MOVWstoreidx4 ptr idx (MOVWUreg x) mem) => (MOVWstoreidx4 ptr idx x mem)
  1143  
  1144  // if a register move has only 1 use, just use the same register without emitting instruction
  1145  // MOVDnop doesn't emit instruction, only for ensuring the type.
  1146  (MOVDreg x) && x.Uses == 1 => (MOVDnop x)
  1147  
  1148  // TODO: we should be able to get rid of MOVDnop all together.
  1149  // But for now, this is enough to get rid of lots of them.
  1150  (MOVDnop (MOVDconst [c])) => (MOVDconst [c])
  1151  
  1152  // fold constant into arithmatic ops
  1153  (ADD x (MOVDconst [c])) => (ADDconst [c] x)
  1154  (SUB x (MOVDconst [c])) => (SUBconst [c] x)
  1155  (AND x (MOVDconst [c])) => (ANDconst [c] x)
  1156  (OR  x (MOVDconst [c])) => (ORconst  [c] x)
  1157  (XOR x (MOVDconst [c])) => (XORconst [c] x)
  1158  (TST x (MOVDconst [c])) => (TSTconst [c] x)
  1159  (TSTW x (MOVDconst [c])) => (TSTWconst [int32(c)] x)
  1160  (CMN x (MOVDconst [c])) => (CMNconst [c] x)
  1161  (CMNW x (MOVDconst [c])) => (CMNWconst [int32(c)] x)
  1162  (BIC x (MOVDconst [c])) => (ANDconst [^c] x)
  1163  (EON x (MOVDconst [c])) => (XORconst [^c] x)
  1164  (ORN x (MOVDconst [c])) => (ORconst  [^c] x)
  1165  
  1166  (SLL x (MOVDconst [c])) => (SLLconst x [c&63]) // Note: I don't think we ever generate bad constant shifts (i.e. c>=64)
  1167  (SRL x (MOVDconst [c])) => (SRLconst x [c&63])
  1168  (SRA x (MOVDconst [c])) => (SRAconst x [c&63])
  1169  
  1170  (CMP x (MOVDconst [c])) => (CMPconst [c] x)
  1171  (CMP (MOVDconst [c]) x) => (InvertFlags (CMPconst [c] x))
  1172  (CMPW x (MOVDconst [c])) => (CMPWconst [int32(c)] x)
  1173  (CMPW (MOVDconst [c]) x) => (InvertFlags (CMPWconst [int32(c)] x))
  1174  
  1175  (ROR x (MOVDconst [c])) => (RORconst x [c&63])
  1176  (RORW x (MOVDconst [c])) => (RORWconst x [c&31])
  1177  
  1178  // Canonicalize the order of arguments to comparisons - helps with CSE.
  1179  ((CMP|CMPW) x y) && canonLessThan(x,y) => (InvertFlags ((CMP|CMPW) y x))
  1180  
  1181  // mul-neg => mneg
  1182  (NEG (MUL x y)) => (MNEG x y)
  1183  (NEG (MULW x y)) => (MNEGW x y)
  1184  (MUL (NEG x) y) => (MNEG x y)
  1185  (MULW (NEG x) y) => (MNEGW x y)
  1186  
  1187  // madd/msub
  1188  (ADD a l:(MUL  x y)) && l.Uses==1 && clobber(l) => (MADD a x y)
  1189  (SUB a l:(MUL  x y)) && l.Uses==1 && clobber(l) => (MSUB a x y)
  1190  (ADD a l:(MNEG x y)) && l.Uses==1 && clobber(l) => (MSUB a x y)
  1191  (SUB a l:(MNEG x y)) && l.Uses==1 && clobber(l) => (MADD a x y)
  1192  
  1193  (ADD a l:(MULW  x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) => (MADDW a x y)
  1194  (SUB a l:(MULW  x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) => (MSUBW a x y)
  1195  (ADD a l:(MNEGW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) => (MSUBW a x y)
  1196  (SUB a l:(MNEGW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) => (MADDW a x y)
  1197  
  1198  // optimize ADCSflags, SBCSflags and friends
  1199  (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] (ADCzerocarry <typ.UInt64> c)))) => (ADCSflags x y c)
  1200  (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] (MOVDconst [0])))) => (ADDSflags x y)
  1201  (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags (NEG <typ.UInt64> (NGCzerocarry <typ.UInt64> bo))))) => (SBCSflags x y bo)
  1202  (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags (MOVDconst [0])))) => (SUBSflags x y)
  1203  
  1204  // mul by constant
  1205  (MUL x (MOVDconst [-1])) => (NEG x)
  1206  (MUL _ (MOVDconst [0])) => (MOVDconst [0])
  1207  (MUL x (MOVDconst [1])) => x
  1208  (MUL x (MOVDconst [c])) && isPowerOfTwo64(c) => (SLLconst [log64(c)] x)
  1209  (MUL x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c >= 3 => (ADDshiftLL x x [log64(c-1)])
  1210  (MUL x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c >= 7 => (ADDshiftLL (NEG <x.Type> x) x [log64(c+1)])
  1211  (MUL x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (SLLconst [log64(c/3)] (ADDshiftLL <x.Type> x x [1]))
  1212  (MUL x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (SLLconst [log64(c/5)] (ADDshiftLL <x.Type> x x [2]))
  1213  (MUL x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (SLLconst [log64(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
  1214  (MUL x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (SLLconst [log64(c/9)] (ADDshiftLL <x.Type> x x [3]))
  1215  
  1216  (MULW x (MOVDconst [c])) && int32(c)==-1 => (NEG x)
  1217  (MULW _ (MOVDconst [c])) && int32(c)==0 => (MOVDconst [0])
  1218  (MULW x (MOVDconst [c])) && int32(c)==1 => x
  1219  (MULW x (MOVDconst [c])) && isPowerOfTwo64(c) => (SLLconst [log64(c)] x)
  1220  (MULW x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c) >= 3 => (ADDshiftLL x x [log64(c-1)])
  1221  (MULW x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c) >= 7 => (ADDshiftLL (NEG <x.Type> x) x [log64(c+1)])
  1222  (MULW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SLLconst [log64(c/3)] (ADDshiftLL <x.Type> x x [1]))
  1223  (MULW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (SLLconst [log64(c/5)] (ADDshiftLL <x.Type> x x [2]))
  1224  (MULW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SLLconst [log64(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
  1225  (MULW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (SLLconst [log64(c/9)] (ADDshiftLL <x.Type> x x [3]))
  1226  
  1227  // mneg by constant
  1228  (MNEG x (MOVDconst [-1])) => x
  1229  (MNEG _ (MOVDconst [0])) => (MOVDconst [0])
  1230  (MNEG x (MOVDconst [1])) => (NEG x)
  1231  (MNEG x (MOVDconst [c])) && isPowerOfTwo64(c) => (NEG (SLLconst <x.Type> [log64(c)] x))
  1232  (MNEG x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c >= 3 => (NEG (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1233  (MNEG x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c >= 7 => (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log64(c+1)]))
  1234  (MNEG x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (SLLconst <x.Type> [log64(c/3)] (SUBshiftLL <x.Type> x x [2]))
  1235  (MNEG x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (NEG (SLLconst <x.Type> [log64(c/5)] (ADDshiftLL <x.Type> x x [2])))
  1236  (MNEG x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (SLLconst <x.Type> [log64(c/7)] (SUBshiftLL <x.Type> x x [3]))
  1237  (MNEG x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (NEG (SLLconst <x.Type> [log64(c/9)] (ADDshiftLL <x.Type> x x [3])))
  1238  
  1239  
  1240  (MNEGW x (MOVDconst [c])) && int32(c)==-1 => x
  1241  (MNEGW _ (MOVDconst [c])) && int32(c)==0 => (MOVDconst [0])
  1242  (MNEGW x (MOVDconst [c])) && int32(c)==1 => (NEG x)
  1243  (MNEGW x (MOVDconst [c])) && isPowerOfTwo64(c) => (NEG (SLLconst <x.Type> [log64(c)] x))
  1244  (MNEGW x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c) >= 3 => (NEG (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1245  (MNEGW x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c) >= 7 => (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log64(c+1)]))
  1246  (MNEGW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SLLconst <x.Type> [log64(c/3)] (SUBshiftLL <x.Type> x x [2]))
  1247  (MNEGW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (NEG (SLLconst <x.Type> [log64(c/5)] (ADDshiftLL <x.Type> x x [2])))
  1248  (MNEGW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SLLconst <x.Type> [log64(c/7)] (SUBshiftLL <x.Type> x x [3]))
  1249  (MNEGW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (NEG (SLLconst <x.Type> [log64(c/9)] (ADDshiftLL <x.Type> x x [3])))
  1250  
  1251  
  1252  (MADD a x (MOVDconst [-1])) => (SUB a x)
  1253  (MADD a _ (MOVDconst [0])) => a
  1254  (MADD a x (MOVDconst [1])) => (ADD a x)
  1255  (MADD a x (MOVDconst [c])) && isPowerOfTwo64(c) => (ADDshiftLL a x [log64(c)])
  1256  (MADD a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c>=3 => (ADD a (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1257  (MADD a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c>=7 => (SUB a (SUBshiftLL <x.Type> x x [log64(c+1)]))
  1258  (MADD a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
  1259  (MADD a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
  1260  (MADD a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
  1261  (MADD a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
  1262  
  1263  (MADD a (MOVDconst [-1]) x) => (SUB a x)
  1264  (MADD a (MOVDconst [0]) _) => a
  1265  (MADD a (MOVDconst [1]) x) => (ADD a x)
  1266  (MADD a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (ADDshiftLL a x [log64(c)])
  1267  (MADD a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && c>=3 => (ADD a (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1268  (MADD a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && c>=7 => (SUB a (SUBshiftLL <x.Type> x x [log64(c+1)]))
  1269  (MADD a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
  1270  (MADD a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
  1271  (MADD a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
  1272  (MADD a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
  1273  
  1274  (MADDW a x (MOVDconst [c])) && int32(c)==-1 => (SUB a x)
  1275  (MADDW a _ (MOVDconst [c])) && int32(c)==0 => a
  1276  (MADDW a x (MOVDconst [c])) && int32(c)==1 => (ADD a x)
  1277  (MADDW a x (MOVDconst [c])) && isPowerOfTwo64(c) => (ADDshiftLL a x [log64(c)])
  1278  (MADDW a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c)>=3 => (ADD a (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1279  (MADDW a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c)>=7 => (SUB a (SUBshiftLL <x.Type> x x [log64(c+1)]))
  1280  (MADDW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
  1281  (MADDW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
  1282  (MADDW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
  1283  (MADDW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
  1284  
  1285  (MADDW a (MOVDconst [c]) x) && int32(c)==-1 => (SUB a x)
  1286  (MADDW a (MOVDconst [c]) _) && int32(c)==0 => a
  1287  (MADDW a (MOVDconst [c]) x) && int32(c)==1 => (ADD a x)
  1288  (MADDW a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (ADDshiftLL a x [log64(c)])
  1289  (MADDW a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && int32(c)>=3 => (ADD a (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1290  (MADDW a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && int32(c)>=7 => (SUB a (SUBshiftLL <x.Type> x x [log64(c+1)]))
  1291  (MADDW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
  1292  (MADDW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
  1293  (MADDW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
  1294  (MADDW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
  1295  
  1296  (MSUB a x (MOVDconst [-1])) => (ADD a x)
  1297  (MSUB a _ (MOVDconst [0])) => a
  1298  (MSUB a x (MOVDconst [1])) => (SUB a x)
  1299  (MSUB a x (MOVDconst [c])) && isPowerOfTwo64(c) => (SUBshiftLL a x [log64(c)])
  1300  (MSUB a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c>=3 => (SUB a (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1301  (MSUB a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c>=7 => (ADD a (SUBshiftLL <x.Type> x x [log64(c+1)]))
  1302  (MSUB a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
  1303  (MSUB a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
  1304  (MSUB a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
  1305  (MSUB a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
  1306  
  1307  (MSUB a (MOVDconst [-1]) x) => (ADD a x)
  1308  (MSUB a (MOVDconst [0]) _) => a
  1309  (MSUB a (MOVDconst [1]) x) => (SUB a x)
  1310  (MSUB a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (SUBshiftLL a x [log64(c)])
  1311  (MSUB a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && c>=3 => (SUB a (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1312  (MSUB a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && c>=7 => (ADD a (SUBshiftLL <x.Type> x x [log64(c+1)]))
  1313  (MSUB a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
  1314  (MSUB a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
  1315  (MSUB a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
  1316  (MSUB a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
  1317  
  1318  (MSUBW a x (MOVDconst [c])) && int32(c)==-1 => (ADD a x)
  1319  (MSUBW a _ (MOVDconst [c])) && int32(c)==0 => a
  1320  (MSUBW a x (MOVDconst [c])) && int32(c)==1 => (SUB a x)
  1321  (MSUBW a x (MOVDconst [c])) && isPowerOfTwo64(c) => (SUBshiftLL a x [log64(c)])
  1322  (MSUBW a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c)>=3 => (SUB a (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1323  (MSUBW a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c)>=7 => (ADD a (SUBshiftLL <x.Type> x x [log64(c+1)]))
  1324  (MSUBW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
  1325  (MSUBW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
  1326  (MSUBW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
  1327  (MSUBW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
  1328  
  1329  (MSUBW a (MOVDconst [c]) x) && int32(c)==-1 => (ADD a x)
  1330  (MSUBW a (MOVDconst [c]) _) && int32(c)==0 => a
  1331  (MSUBW a (MOVDconst [c]) x) && int32(c)==1 => (SUB a x)
  1332  (MSUBW a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (SUBshiftLL a x [log64(c)])
  1333  (MSUBW a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && int32(c)>=3 => (SUB a (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1334  (MSUBW a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && int32(c)>=7 => (ADD a (SUBshiftLL <x.Type> x x [log64(c+1)]))
  1335  (MSUBW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
  1336  (MSUBW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
  1337  (MSUBW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
  1338  (MSUBW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
  1339  
  1340  // div by constant
  1341  (UDIV x (MOVDconst [1])) => x
  1342  (UDIV x (MOVDconst [c])) && isPowerOfTwo64(c) => (SRLconst [log64(c)] x)
  1343  (UDIVW x (MOVDconst [c])) && uint32(c)==1 => x
  1344  (UDIVW x (MOVDconst [c])) && isPowerOfTwo64(c) && is32Bit(c) => (SRLconst [log64(c)] x)
  1345  (UMOD _ (MOVDconst [1])) => (MOVDconst [0])
  1346  (UMOD x (MOVDconst [c])) && isPowerOfTwo64(c) => (ANDconst [c-1] x)
  1347  (UMODW _ (MOVDconst [c])) && uint32(c)==1 => (MOVDconst [0])
  1348  (UMODW x (MOVDconst [c])) && isPowerOfTwo64(c) && is32Bit(c) => (ANDconst [c-1] x)
  1349  
  1350  // generic simplifications
  1351  (ADD x (NEG y)) => (SUB x y)
  1352  (SUB x x) => (MOVDconst [0])
  1353  (AND x x) => x
  1354  (OR  x x) => x
  1355  (XOR x x) => (MOVDconst [0])
  1356  (BIC x x) => (MOVDconst [0])
  1357  (EON x x) => (MOVDconst [-1])
  1358  (ORN x x) => (MOVDconst [-1])
  1359  (AND x (MVN y)) => (BIC x y)
  1360  (XOR x (MVN y)) => (EON x y)
  1361  (OR  x (MVN y)) => (ORN x y)
  1362  (MVN (XOR x y)) => (EON x y)
  1363  (NEG (NEG x)) => x
  1364  
  1365  (CSEL [cc] (MOVDconst [-1]) (MOVDconst [0]) flag) => (CSETM [cc] flag)
  1366  (CSEL [cc] (MOVDconst [0]) (MOVDconst [-1]) flag) => (CSETM [arm64Negate(cc)] flag)
  1367  (CSEL [cc] x (MOVDconst [0]) flag) => (CSEL0 [cc] x flag)
  1368  (CSEL [cc] (MOVDconst [0]) y flag) => (CSEL0 [arm64Negate(cc)] y flag)
  1369  (CSEL [cc] x (ADDconst [1] a) flag) => (CSINC [cc] x a flag)
  1370  (CSEL [cc] (ADDconst [1] a) x flag) => (CSINC [arm64Negate(cc)] x a flag)
  1371  (CSEL [cc] x (MVN a) flag) => (CSINV [cc] x a flag)
  1372  (CSEL [cc] (MVN a) x flag) => (CSINV [arm64Negate(cc)] x a flag)
  1373  (CSEL [cc] x (NEG a) flag) => (CSNEG [cc] x a flag)
  1374  (CSEL [cc] (NEG a) x flag) => (CSNEG [arm64Negate(cc)] x a flag)
  1375  
  1376  (SUB x (SUB y z)) => (SUB (ADD <v.Type> x z) y)
  1377  (SUB (SUB x y) z) => (SUB x (ADD <y.Type> y z))
  1378  
  1379  // remove redundant *const ops
  1380  (ADDconst [0]  x) => x
  1381  (SUBconst [0]  x) => x
  1382  (ANDconst [0]  _) => (MOVDconst [0])
  1383  (ANDconst [-1] x) => x
  1384  (ORconst  [0]  x) => x
  1385  (ORconst  [-1] _) => (MOVDconst [-1])
  1386  (XORconst [0]  x) => x
  1387  (XORconst [-1] x) => (MVN x)
  1388  
  1389  // generic constant folding
  1390  (ADDconst [c] (MOVDconst [d]))  => (MOVDconst [c+d])
  1391  (ADDconst [c] (ADDconst [d] x)) => (ADDconst [c+d] x)
  1392  (ADDconst [c] (SUBconst [d] x)) => (ADDconst [c-d] x)
  1393  (SUBconst [c] (MOVDconst [d]))  => (MOVDconst [d-c])
  1394  (SUBconst [c] (SUBconst [d] x)) => (ADDconst [-c-d] x)
  1395  (SUBconst [c] (ADDconst [d] x)) => (ADDconst [-c+d] x)
  1396  (SLLconst [c] (MOVDconst [d]))  => (MOVDconst [d<<uint64(c)])
  1397  (SRLconst [c] (MOVDconst [d]))  => (MOVDconst [int64(uint64(d)>>uint64(c))])
  1398  (SRAconst [c] (MOVDconst [d]))  => (MOVDconst [d>>uint64(c)])
  1399  (MUL   (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [c*d])
  1400  (MULW  (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [int64(int32(c)*int32(d))])
  1401  (MNEG  (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [-c*d])
  1402  (MNEGW (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [-int64(int32(c)*int32(d))])
  1403  (MADD  (MOVDconst [c]) x y) => (ADDconst [c] (MUL   <x.Type> x y))
  1404  (MADDW (MOVDconst [c]) x y) => (ADDconst [c] (MULW  <x.Type> x y))
  1405  (MSUB  (MOVDconst [c]) x y) => (ADDconst [c] (MNEG  <x.Type> x y))
  1406  (MSUBW (MOVDconst [c]) x y) => (ADDconst [c] (MNEGW <x.Type> x y))
  1407  (MADD  a (MOVDconst [c]) (MOVDconst [d])) => (ADDconst [c*d] a)
  1408  (MADDW a (MOVDconst [c]) (MOVDconst [d])) => (ADDconst [int64(int32(c)*int32(d))] a)
  1409  (MSUB  a (MOVDconst [c]) (MOVDconst [d])) => (SUBconst [c*d] a)
  1410  (MSUBW a (MOVDconst [c]) (MOVDconst [d])) => (SUBconst [int64(int32(c)*int32(d))] a)
  1411  (DIV   (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [c/d])
  1412  (UDIV  (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [int64(uint64(c)/uint64(d))])
  1413  (DIVW  (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [int64(int32(c)/int32(d))])
  1414  (UDIVW (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [int64(uint32(c)/uint32(d))])
  1415  (MOD   (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [c%d])
  1416  (UMOD  (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [int64(uint64(c)%uint64(d))])
  1417  (MODW  (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [int64(int32(c)%int32(d))])
  1418  (UMODW (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [int64(uint32(c)%uint32(d))])
  1419  (ANDconst [c] (MOVDconst [d]))  => (MOVDconst [c&d])
  1420  (ANDconst [c] (ANDconst [d] x)) => (ANDconst [c&d] x)
  1421  (ANDconst [c] (MOVWUreg x)) => (ANDconst [c&(1<<32-1)] x)
  1422  (ANDconst [c] (MOVHUreg x)) => (ANDconst [c&(1<<16-1)] x)
  1423  (ANDconst [c] (MOVBUreg x)) => (ANDconst [c&(1<<8-1)] x)
  1424  (MOVWUreg (ANDconst [c] x)) => (ANDconst [c&(1<<32-1)] x)
  1425  (MOVHUreg (ANDconst [c] x)) => (ANDconst [c&(1<<16-1)] x)
  1426  (MOVBUreg (ANDconst [c] x)) => (ANDconst [c&(1<<8-1)] x)
  1427  (ORconst  [c] (MOVDconst [d]))  => (MOVDconst [c|d])
  1428  (ORconst  [c] (ORconst [d] x))  => (ORconst [c|d] x)
  1429  (XORconst [c] (MOVDconst [d]))  => (MOVDconst [c^d])
  1430  (XORconst [c] (XORconst [d] x)) => (XORconst [c^d] x)
  1431  (MVN (MOVDconst [c])) => (MOVDconst [^c])
  1432  (NEG (MOVDconst [c])) => (MOVDconst [-c])
  1433  (MOVBreg  (MOVDconst [c])) => (MOVDconst [int64(int8(c))])
  1434  (MOVBUreg (MOVDconst [c])) => (MOVDconst [int64(uint8(c))])
  1435  (MOVHreg  (MOVDconst [c])) => (MOVDconst [int64(int16(c))])
  1436  (MOVHUreg (MOVDconst [c])) => (MOVDconst [int64(uint16(c))])
  1437  (MOVWreg  (MOVDconst [c])) => (MOVDconst [int64(int32(c))])
  1438  (MOVWUreg (MOVDconst [c])) => (MOVDconst [int64(uint32(c))])
  1439  (MOVDreg  (MOVDconst [c])) => (MOVDconst [c])
  1440  
  1441  // constant comparisons
  1442  (CMPconst  (MOVDconst [x]) [y]) => (FlagConstant [subFlags64(x,y)])
  1443  (CMPWconst (MOVDconst [x]) [y]) => (FlagConstant [subFlags32(int32(x),y)])
  1444  (TSTconst  (MOVDconst [x]) [y]) => (FlagConstant [logicFlags64(x&y)])
  1445  (TSTWconst (MOVDconst [x]) [y]) => (FlagConstant [logicFlags32(int32(x)&y)])
  1446  (CMNconst  (MOVDconst [x]) [y]) => (FlagConstant [addFlags64(x,y)])
  1447  (CMNWconst (MOVDconst [x]) [y]) => (FlagConstant [addFlags32(int32(x),y)])
  1448  
  1449  // other known comparisons
  1450  (CMPconst (MOVBUreg _) [c]) && 0xff < c => (FlagConstant [subFlags64(0,1)])
  1451  (CMPconst (MOVHUreg _) [c]) && 0xffff < c => (FlagConstant [subFlags64(0,1)])
  1452  (CMPconst (MOVWUreg _) [c]) && 0xffffffff < c => (FlagConstant [subFlags64(0,1)])
  1453  (CMPconst (ANDconst _ [m]) [n]) && 0 <= m && m < n => (FlagConstant [subFlags64(0,1)])
  1454  (CMPconst (SRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 63 && (1<<uint64(64-c)) <= uint64(n) => (FlagConstant [subFlags64(0,1)])
  1455  (CMPWconst (MOVBUreg _) [c]) && 0xff < c => (FlagConstant [subFlags64(0,1)])
  1456  (CMPWconst (MOVHUreg _) [c]) && 0xffff < c => (FlagConstant [subFlags64(0,1)])
  1457  
  1458  // absorb flag constants into branches
  1459  (EQ (FlagConstant [fc]) yes no) &&  fc.eq() => (First yes no)
  1460  (EQ (FlagConstant [fc]) yes no) && !fc.eq() => (First no yes)
  1461  
  1462  (NE (FlagConstant [fc]) yes no) &&  fc.ne() => (First yes no)
  1463  (NE (FlagConstant [fc]) yes no) && !fc.ne() => (First no yes)
  1464  
  1465  (LT (FlagConstant [fc]) yes no) &&  fc.lt() => (First yes no)
  1466  (LT (FlagConstant [fc]) yes no) && !fc.lt() => (First no yes)
  1467  
  1468  (LE (FlagConstant [fc]) yes no) &&  fc.le() => (First yes no)
  1469  (LE (FlagConstant [fc]) yes no) && !fc.le() => (First no yes)
  1470  
  1471  (GT (FlagConstant [fc]) yes no) &&  fc.gt() => (First yes no)
  1472  (GT (FlagConstant [fc]) yes no) && !fc.gt() => (First no yes)
  1473  
  1474  (GE (FlagConstant [fc]) yes no) &&  fc.ge() => (First yes no)
  1475  (GE (FlagConstant [fc]) yes no) && !fc.ge() => (First no yes)
  1476  
  1477  (ULT (FlagConstant [fc]) yes no) &&  fc.ult() => (First yes no)
  1478  (ULT (FlagConstant [fc]) yes no) && !fc.ult() => (First no yes)
  1479  
  1480  (ULE (FlagConstant [fc]) yes no) &&  fc.ule() => (First yes no)
  1481  (ULE (FlagConstant [fc]) yes no) && !fc.ule() => (First no yes)
  1482  
  1483  (UGT (FlagConstant [fc]) yes no) &&  fc.ugt() => (First yes no)
  1484  (UGT (FlagConstant [fc]) yes no) && !fc.ugt() => (First no yes)
  1485  
  1486  (UGE (FlagConstant [fc]) yes no) &&  fc.uge() => (First yes no)
  1487  (UGE (FlagConstant [fc]) yes no) && !fc.uge() => (First no yes)
  1488  
  1489  (LTnoov (FlagConstant [fc]) yes no) &&  fc.ltNoov() => (First yes no)
  1490  (LTnoov (FlagConstant [fc]) yes no) && !fc.ltNoov() => (First no yes)
  1491  
  1492  (LEnoov (FlagConstant [fc]) yes no) &&  fc.leNoov() => (First yes no)
  1493  (LEnoov (FlagConstant [fc]) yes no) && !fc.leNoov() => (First no yes)
  1494  
  1495  (GTnoov (FlagConstant [fc]) yes no) &&  fc.gtNoov() => (First yes no)
  1496  (GTnoov (FlagConstant [fc]) yes no) && !fc.gtNoov() => (First no yes)
  1497  
  1498  (GEnoov (FlagConstant [fc]) yes no) &&  fc.geNoov() => (First yes no)
  1499  (GEnoov (FlagConstant [fc]) yes no) && !fc.geNoov() => (First no yes)
  1500  
  1501  (Z (MOVDconst [0]) yes no) => (First yes no)
  1502  (Z (MOVDconst [c]) yes no) && c != 0 => (First no yes)
  1503  (NZ (MOVDconst [0]) yes no) => (First no yes)
  1504  (NZ (MOVDconst [c]) yes no) && c != 0 => (First yes no)
  1505  (ZW (MOVDconst [c]) yes no) && int32(c) == 0 => (First yes no)
  1506  (ZW (MOVDconst [c]) yes no) && int32(c) != 0 => (First no yes)
  1507  (NZW (MOVDconst [c]) yes no) && int32(c) == 0 => (First no yes)
  1508  (NZW (MOVDconst [c]) yes no) && int32(c) != 0 => (First yes no)
  1509  
  1510  // absorb InvertFlags into branches
  1511  (LT (InvertFlags cmp) yes no) => (GT cmp yes no)
  1512  (GT (InvertFlags cmp) yes no) => (LT cmp yes no)
  1513  (LE (InvertFlags cmp) yes no) => (GE cmp yes no)
  1514  (GE (InvertFlags cmp) yes no) => (LE cmp yes no)
  1515  (ULT (InvertFlags cmp) yes no) => (UGT cmp yes no)
  1516  (UGT (InvertFlags cmp) yes no) => (ULT cmp yes no)
  1517  (ULE (InvertFlags cmp) yes no) => (UGE cmp yes no)
  1518  (UGE (InvertFlags cmp) yes no) => (ULE cmp yes no)
  1519  (EQ (InvertFlags cmp) yes no) => (EQ cmp yes no)
  1520  (NE (InvertFlags cmp) yes no) => (NE cmp yes no)
  1521  (FLT (InvertFlags cmp) yes no) => (FGT cmp yes no)
  1522  (FGT (InvertFlags cmp) yes no) => (FLT cmp yes no)
  1523  (FLE (InvertFlags cmp) yes no) => (FGE cmp yes no)
  1524  (FGE (InvertFlags cmp) yes no) => (FLE cmp yes no)
  1525  (LTnoov (InvertFlags cmp) yes no) => (GTnoov cmp yes no)
  1526  (GEnoov (InvertFlags cmp) yes no) => (LEnoov cmp yes no)
  1527  (LEnoov (InvertFlags cmp) yes no) => (GEnoov cmp yes no)
  1528  (GTnoov (InvertFlags cmp) yes no) => (LTnoov cmp yes no)
  1529  
  1530  // absorb InvertFlags into conditional instructions
  1531  (CSEL [cc] x y (InvertFlags cmp)) => (CSEL [arm64Invert(cc)] x y cmp)
  1532  (CSEL0 [cc] x (InvertFlags cmp)) => (CSEL0 [arm64Invert(cc)] x cmp)
  1533  (CSETM [cc] (InvertFlags cmp)) => (CSETM [arm64Invert(cc)] cmp)
  1534  (CSINC [cc] x y (InvertFlags cmp)) => (CSINC [arm64Invert(cc)] x y cmp)
  1535  (CSINV [cc] x y (InvertFlags cmp)) => (CSINV [arm64Invert(cc)] x y cmp)
  1536  (CSNEG [cc] x y (InvertFlags cmp)) => (CSNEG [arm64Invert(cc)] x y cmp)
  1537  
  1538  // absorb flag constants into boolean values
  1539  (Equal (FlagConstant [fc])) => (MOVDconst [b2i(fc.eq())])
  1540  (NotEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.ne())])
  1541  (LessThan (FlagConstant [fc])) => (MOVDconst [b2i(fc.lt())])
  1542  (LessThanU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ult())])
  1543  (LessEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.le())])
  1544  (LessEqualU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ule())])
  1545  (GreaterThan (FlagConstant [fc])) => (MOVDconst [b2i(fc.gt())])
  1546  (GreaterThanU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ugt())])
  1547  (GreaterEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.ge())])
  1548  (GreaterEqualU (FlagConstant [fc])) => (MOVDconst [b2i(fc.uge())])
  1549  
  1550  // absorb InvertFlags into boolean values
  1551  (Equal (InvertFlags x)) => (Equal x)
  1552  (NotEqual (InvertFlags x)) => (NotEqual x)
  1553  (LessThan (InvertFlags x)) => (GreaterThan x)
  1554  (LessThanU (InvertFlags x)) => (GreaterThanU x)
  1555  (GreaterThan (InvertFlags x)) => (LessThan x)
  1556  (GreaterThanU (InvertFlags x)) => (LessThanU x)
  1557  (LessEqual (InvertFlags x)) => (GreaterEqual x)
  1558  (LessEqualU (InvertFlags x)) => (GreaterEqualU x)
  1559  (GreaterEqual (InvertFlags x)) => (LessEqual x)
  1560  (GreaterEqualU (InvertFlags x)) => (LessEqualU x)
  1561  (LessThanF (InvertFlags x)) => (GreaterThanF x)
  1562  (LessEqualF (InvertFlags x)) => (GreaterEqualF x)
  1563  (GreaterThanF (InvertFlags x)) => (LessThanF x)
  1564  (GreaterEqualF (InvertFlags x)) => (LessEqualF x)
  1565  
  1566  // Boolean-generating instructions always
  1567  // zero upper bit of the register; no need to zero-extend
  1568  (MOVBUreg x) && x.Type.IsBoolean() => (MOVDreg x)
  1569  
  1570  // absorb flag constants into conditional instructions
  1571  (CSEL [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x
  1572  (CSEL [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => y
  1573  (CSEL0 [cc] x flag) && ccARM64Eval(cc, flag) > 0 => x
  1574  (CSEL0 [cc] _ flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0])
  1575  (CSNEG [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x
  1576  (CSNEG [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => (NEG y)
  1577  (CSINV [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x
  1578  (CSINV [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => (Not y)
  1579  (CSINC [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x
  1580  (CSINC [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => (ADDconst [1] y)
  1581  (CSETM [cc] flag) && ccARM64Eval(cc, flag) > 0 => (MOVDconst [-1])
  1582  (CSETM [cc] flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0])
  1583  
  1584  // absorb flags back into boolean CSEL
  1585  (CSEL [cc] x y (CMPWconst [0] boolval)) && cc == OpARM64NotEqual && flagArg(boolval) != nil =>
  1586        (CSEL [boolval.Op] x y flagArg(boolval))
  1587  (CSEL [cc] x y (CMPWconst [0] boolval)) && cc == OpARM64Equal && flagArg(boolval) != nil =>
  1588        (CSEL [arm64Negate(boolval.Op)] x y flagArg(boolval))
  1589  (CSEL0 [cc] x (CMPWconst [0] boolval)) && cc == OpARM64NotEqual && flagArg(boolval) != nil =>
  1590        (CSEL0 [boolval.Op] x flagArg(boolval))
  1591  (CSEL0 [cc] x (CMPWconst [0] boolval)) && cc == OpARM64Equal && flagArg(boolval) != nil =>
  1592        (CSEL0 [arm64Negate(boolval.Op)] x flagArg(boolval))
  1593  
  1594  // absorb shifts into ops
  1595  (NEG x:(SLLconst [c] y)) && clobberIfDead(x) => (NEGshiftLL [c] y)
  1596  (NEG x:(SRLconst [c] y)) && clobberIfDead(x) => (NEGshiftRL [c] y)
  1597  (NEG x:(SRAconst [c] y)) && clobberIfDead(x) => (NEGshiftRA [c] y)
  1598  (MVN x:(SLLconst [c] y)) && clobberIfDead(x) => (MVNshiftLL [c] y)
  1599  (MVN x:(SRLconst [c] y)) && clobberIfDead(x) => (MVNshiftRL [c] y)
  1600  (MVN x:(SRAconst [c] y)) && clobberIfDead(x) => (MVNshiftRA [c] y)
  1601  (MVN x:(RORconst [c] y)) && clobberIfDead(x) => (MVNshiftRO [c] y)
  1602  (ADD x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (ADDshiftLL x0 y [c])
  1603  (ADD x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (ADDshiftRL x0 y [c])
  1604  (ADD x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (ADDshiftRA x0 y [c])
  1605  (SUB x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (SUBshiftLL x0 y [c])
  1606  (SUB x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (SUBshiftRL x0 y [c])
  1607  (SUB x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (SUBshiftRA x0 y [c])
  1608  (AND x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (ANDshiftLL x0 y [c])
  1609  (AND x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (ANDshiftRL x0 y [c])
  1610  (AND x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (ANDshiftRA x0 y [c])
  1611  (AND x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (ANDshiftRO x0 y [c])
  1612  (OR  x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (ORshiftLL  x0 y [c]) // useful for combined load
  1613  (OR  x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (ORshiftRL  x0 y [c])
  1614  (OR  x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (ORshiftRA  x0 y [c])
  1615  (OR  x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (ORshiftRO  x0 y [c])
  1616  (XOR x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (XORshiftLL x0 y [c])
  1617  (XOR x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (XORshiftRL x0 y [c])
  1618  (XOR x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (XORshiftRA x0 y [c])
  1619  (XOR x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (XORshiftRO x0 y [c])
  1620  (BIC x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (BICshiftLL x0 y [c])
  1621  (BIC x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (BICshiftRL x0 y [c])
  1622  (BIC x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (BICshiftRA x0 y [c])
  1623  (BIC x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (BICshiftRO x0 y [c])
  1624  (ORN x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (ORNshiftLL x0 y [c])
  1625  (ORN x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (ORNshiftRL x0 y [c])
  1626  (ORN x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (ORNshiftRA x0 y [c])
  1627  (ORN x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (ORNshiftRO x0 y [c])
  1628  (EON x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (EONshiftLL x0 y [c])
  1629  (EON x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (EONshiftRL x0 y [c])
  1630  (EON x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (EONshiftRA x0 y [c])
  1631  (EON x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (EONshiftRO x0 y [c])
  1632  (CMP x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (CMPshiftLL x0 y [c])
  1633  (CMP x0:(SLLconst [c] y) x1) && clobberIfDead(x0) => (InvertFlags (CMPshiftLL x1 y [c]))
  1634  (CMP x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (CMPshiftRL x0 y [c])
  1635  (CMP x0:(SRLconst [c] y) x1) && clobberIfDead(x0) => (InvertFlags (CMPshiftRL x1 y [c]))
  1636  (CMP x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (CMPshiftRA x0 y [c])
  1637  (CMP x0:(SRAconst [c] y) x1) && clobberIfDead(x0) => (InvertFlags (CMPshiftRA x1 y [c]))
  1638  (CMN x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (CMNshiftLL x0 y [c])
  1639  (CMN x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (CMNshiftRL x0 y [c])
  1640  (CMN x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (CMNshiftRA x0 y [c])
  1641  (TST x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (TSTshiftLL x0 y [c])
  1642  (TST x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (TSTshiftRL x0 y [c])
  1643  (TST x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (TSTshiftRA x0 y [c])
  1644  (TST x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (TSTshiftRO x0 y [c])
  1645  
  1646  // prefer *const ops to *shift ops
  1647  (ADDshiftLL (MOVDconst [c]) x [d]) => (ADDconst [c] (SLLconst <x.Type> x [d]))
  1648  (ADDshiftRL (MOVDconst [c]) x [d]) => (ADDconst [c] (SRLconst <x.Type> x [d]))
  1649  (ADDshiftRA (MOVDconst [c]) x [d]) => (ADDconst [c] (SRAconst <x.Type> x [d]))
  1650  (ANDshiftLL (MOVDconst [c]) x [d]) => (ANDconst [c] (SLLconst <x.Type> x [d]))
  1651  (ANDshiftRL (MOVDconst [c]) x [d]) => (ANDconst [c] (SRLconst <x.Type> x [d]))
  1652  (ANDshiftRA (MOVDconst [c]) x [d]) => (ANDconst [c] (SRAconst <x.Type> x [d]))
  1653  (ANDshiftRO (MOVDconst [c]) x [d]) => (ANDconst [c] (RORconst <x.Type> x [d]))
  1654  (ORshiftLL  (MOVDconst [c]) x [d]) => (ORconst  [c] (SLLconst <x.Type> x [d]))
  1655  (ORshiftRL  (MOVDconst [c]) x [d]) => (ORconst  [c] (SRLconst <x.Type> x [d]))
  1656  (ORshiftRA  (MOVDconst [c]) x [d]) => (ORconst  [c] (SRAconst <x.Type> x [d]))
  1657  (ORshiftRO  (MOVDconst [c]) x [d]) => (ORconst  [c] (RORconst <x.Type> x [d]))
  1658  (XORshiftLL (MOVDconst [c]) x [d]) => (XORconst [c] (SLLconst <x.Type> x [d]))
  1659  (XORshiftRL (MOVDconst [c]) x [d]) => (XORconst [c] (SRLconst <x.Type> x [d]))
  1660  (XORshiftRA (MOVDconst [c]) x [d]) => (XORconst [c] (SRAconst <x.Type> x [d]))
  1661  (XORshiftRO (MOVDconst [c]) x [d]) => (XORconst [c] (RORconst <x.Type> x [d]))
  1662  (CMPshiftLL (MOVDconst [c]) x [d]) => (InvertFlags (CMPconst [c] (SLLconst <x.Type> x [d])))
  1663  (CMPshiftRL (MOVDconst [c]) x [d]) => (InvertFlags (CMPconst [c] (SRLconst <x.Type> x [d])))
  1664  (CMPshiftRA (MOVDconst [c]) x [d]) => (InvertFlags (CMPconst [c] (SRAconst <x.Type> x [d])))
  1665  (CMNshiftLL (MOVDconst [c]) x [d]) => (CMNconst [c] (SLLconst <x.Type> x [d]))
  1666  (CMNshiftRL (MOVDconst [c]) x [d]) => (CMNconst [c] (SRLconst <x.Type> x [d]))
  1667  (CMNshiftRA (MOVDconst [c]) x [d]) => (CMNconst [c] (SRAconst <x.Type> x [d]))
  1668  (TSTshiftLL (MOVDconst [c]) x [d]) => (TSTconst [c] (SLLconst <x.Type> x [d]))
  1669  (TSTshiftRL (MOVDconst [c]) x [d]) => (TSTconst [c] (SRLconst <x.Type> x [d]))
  1670  (TSTshiftRA (MOVDconst [c]) x [d]) => (TSTconst [c] (SRAconst <x.Type> x [d]))
  1671  (TSTshiftRO (MOVDconst [c]) x [d]) => (TSTconst [c] (RORconst <x.Type> x [d]))
  1672  
  1673  // constant folding in *shift ops
  1674  (MVNshiftLL (MOVDconst [c]) [d]) => (MOVDconst [^int64(uint64(c)<<uint64(d))])
  1675  (MVNshiftRL (MOVDconst [c]) [d]) => (MOVDconst [^int64(uint64(c)>>uint64(d))])
  1676  (MVNshiftRA (MOVDconst [c]) [d]) => (MOVDconst [^(c>>uint64(d))])
  1677  (MVNshiftRO (MOVDconst [c]) [d]) => (MOVDconst [^rotateRight64(c, d)])
  1678  (NEGshiftLL (MOVDconst [c]) [d]) => (MOVDconst [-int64(uint64(c)<<uint64(d))])
  1679  (NEGshiftRL (MOVDconst [c]) [d]) => (MOVDconst [-int64(uint64(c)>>uint64(d))])
  1680  (NEGshiftRA (MOVDconst [c]) [d]) => (MOVDconst [-(c>>uint64(d))])
  1681  (ADDshiftLL x (MOVDconst [c]) [d]) => (ADDconst x [int64(uint64(c)<<uint64(d))])
  1682  (ADDshiftRL x (MOVDconst [c]) [d]) => (ADDconst x [int64(uint64(c)>>uint64(d))])
  1683  (ADDshiftRA x (MOVDconst [c]) [d]) => (ADDconst x [c>>uint64(d)])
  1684  (SUBshiftLL x (MOVDconst [c]) [d]) => (SUBconst x [int64(uint64(c)<<uint64(d))])
  1685  (SUBshiftRL x (MOVDconst [c]) [d]) => (SUBconst x [int64(uint64(c)>>uint64(d))])
  1686  (SUBshiftRA x (MOVDconst [c]) [d]) => (SUBconst x [c>>uint64(d)])
  1687  (ANDshiftLL x (MOVDconst [c]) [d]) => (ANDconst x [int64(uint64(c)<<uint64(d))])
  1688  (ANDshiftRL x (MOVDconst [c]) [d]) => (ANDconst x [int64(uint64(c)>>uint64(d))])
  1689  (ANDshiftRA x (MOVDconst [c]) [d]) => (ANDconst x [c>>uint64(d)])
  1690  (ANDshiftRO x (MOVDconst [c]) [d]) => (ANDconst x [rotateRight64(c, d)])
  1691  (ORshiftLL  x (MOVDconst [c]) [d]) => (ORconst  x [int64(uint64(c)<<uint64(d))])
  1692  (ORshiftRL  x (MOVDconst [c]) [d]) => (ORconst  x [int64(uint64(c)>>uint64(d))])
  1693  (ORshiftRA  x (MOVDconst [c]) [d]) => (ORconst  x [c>>uint64(d)])
  1694  (ORshiftRO  x (MOVDconst [c]) [d]) => (ORconst  x [rotateRight64(c, d)])
  1695  (XORshiftLL x (MOVDconst [c]) [d]) => (XORconst x [int64(uint64(c)<<uint64(d))])
  1696  (XORshiftRL x (MOVDconst [c]) [d]) => (XORconst x [int64(uint64(c)>>uint64(d))])
  1697  (XORshiftRA x (MOVDconst [c]) [d]) => (XORconst x [c>>uint64(d)])
  1698  (XORshiftRO x (MOVDconst [c]) [d]) => (XORconst x [rotateRight64(c, d)])
  1699  (BICshiftLL x (MOVDconst [c]) [d]) => (ANDconst x [^int64(uint64(c)<<uint64(d))])
  1700  (BICshiftRL x (MOVDconst [c]) [d]) => (ANDconst x [^int64(uint64(c)>>uint64(d))])
  1701  (BICshiftRA x (MOVDconst [c]) [d]) => (ANDconst x [^(c>>uint64(d))])
  1702  (BICshiftRO x (MOVDconst [c]) [d]) => (ANDconst x [^rotateRight64(c, d)])
  1703  (ORNshiftLL x (MOVDconst [c]) [d]) => (ORconst  x [^int64(uint64(c)<<uint64(d))])
  1704  (ORNshiftRL x (MOVDconst [c]) [d]) => (ORconst  x [^int64(uint64(c)>>uint64(d))])
  1705  (ORNshiftRA x (MOVDconst [c]) [d]) => (ORconst  x [^(c>>uint64(d))])
  1706  (ORNshiftRO x (MOVDconst [c]) [d]) => (ORconst  x [^rotateRight64(c, d)])
  1707  (EONshiftLL x (MOVDconst [c]) [d]) => (XORconst x [^int64(uint64(c)<<uint64(d))])
  1708  (EONshiftRL x (MOVDconst [c]) [d]) => (XORconst x [^int64(uint64(c)>>uint64(d))])
  1709  (EONshiftRA x (MOVDconst [c]) [d]) => (XORconst x [^(c>>uint64(d))])
  1710  (EONshiftRO x (MOVDconst [c]) [d]) => (XORconst x [^rotateRight64(c, d)])
  1711  (CMPshiftLL x (MOVDconst [c]) [d]) => (CMPconst x [int64(uint64(c)<<uint64(d))])
  1712  (CMPshiftRL x (MOVDconst [c]) [d]) => (CMPconst x [int64(uint64(c)>>uint64(d))])
  1713  (CMPshiftRA x (MOVDconst [c]) [d]) => (CMPconst x [c>>uint64(d)])
  1714  (CMNshiftLL x (MOVDconst [c]) [d]) => (CMNconst x [int64(uint64(c)<<uint64(d))])
  1715  (CMNshiftRL x (MOVDconst [c]) [d]) => (CMNconst x [int64(uint64(c)>>uint64(d))])
  1716  (CMNshiftRA x (MOVDconst [c]) [d]) => (CMNconst x [c>>uint64(d)])
  1717  (TSTshiftLL x (MOVDconst [c]) [d]) => (TSTconst x [int64(uint64(c)<<uint64(d))])
  1718  (TSTshiftRL x (MOVDconst [c]) [d]) => (TSTconst x [int64(uint64(c)>>uint64(d))])
  1719  (TSTshiftRA x (MOVDconst [c]) [d]) => (TSTconst x [c>>uint64(d)])
  1720  (TSTshiftRO x (MOVDconst [c]) [d]) => (TSTconst x [rotateRight64(c, d)])
  1721  
  1722  // simplification with *shift ops
  1723  (SUBshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [0])
  1724  (SUBshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [0])
  1725  (SUBshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [0])
  1726  (ANDshiftLL y:(SLLconst x [c]) x [c]) => y
  1727  (ANDshiftRL y:(SRLconst x [c]) x [c]) => y
  1728  (ANDshiftRA y:(SRAconst x [c]) x [c]) => y
  1729  (ANDshiftRO y:(RORconst x [c]) x [c]) => y
  1730  (ORshiftLL  y:(SLLconst x [c]) x [c]) => y
  1731  (ORshiftRL  y:(SRLconst x [c]) x [c]) => y
  1732  (ORshiftRA  y:(SRAconst x [c]) x [c]) => y
  1733  (ORshiftRO  y:(RORconst x [c]) x [c]) => y
  1734  (XORshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [0])
  1735  (XORshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [0])
  1736  (XORshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [0])
  1737  (XORshiftRO (RORconst x [c]) x [c]) => (MOVDconst [0])
  1738  (BICshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [0])
  1739  (BICshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [0])
  1740  (BICshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [0])
  1741  (BICshiftRO (RORconst x [c]) x [c]) => (MOVDconst [0])
  1742  (EONshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [-1])
  1743  (EONshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [-1])
  1744  (EONshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [-1])
  1745  (EONshiftRO (RORconst x [c]) x [c]) => (MOVDconst [-1])
  1746  (ORNshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [-1])
  1747  (ORNshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [-1])
  1748  (ORNshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [-1])
  1749  (ORNshiftRO (RORconst x [c]) x [c]) => (MOVDconst [-1])
  1750  
  1751  // Generate rotates with const shift
  1752  (ADDshiftLL [c] (SRLconst x [64-c]) x) => (RORconst [64-c] x)
  1753  ( ORshiftLL [c] (SRLconst x [64-c]) x) => (RORconst [64-c] x)
  1754  (XORshiftLL [c] (SRLconst x [64-c]) x) => (RORconst [64-c] x)
  1755  (ADDshiftRL [c] (SLLconst x [64-c]) x) => (RORconst [   c] x)
  1756  ( ORshiftRL [c] (SLLconst x [64-c]) x) => (RORconst [   c] x)
  1757  (XORshiftRL [c] (SLLconst x [64-c]) x) => (RORconst [   c] x)
  1758  
  1759  (ADDshiftLL <t> [c] (UBFX [bfc] x) x) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
  1760  	=> (RORWconst [32-c] x)
  1761  ( ORshiftLL <t> [c] (UBFX [bfc] x) x) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
  1762  	=> (RORWconst [32-c] x)
  1763  (XORshiftLL <t> [c] (UBFX [bfc] x) x) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
  1764  	=> (RORWconst [32-c] x)
  1765  (ADDshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 => (RORWconst [c] x)
  1766  ( ORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 => (RORWconst [c] x)
  1767  (XORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 => (RORWconst [c] x)
  1768  
  1769  (RORconst [c] (RORconst [d] x)) => (RORconst [(c+d)&63] x)
  1770  (RORWconst [c] (RORWconst [d] x)) => (RORWconst [(c+d)&31] x)
  1771  
  1772  // Generate rotates with non-const shift.
  1773  // These rules match the Go source code like
  1774  //	y &= 63
  1775  //	x << y | x >> (64-y)
  1776  // "|" can also be "^" or "+".
  1777  // As arm64 does not have a ROL instruction, so ROL(x, y) is replaced by ROR(x, -y).
  1778  ((ADD|OR|XOR) (SLL x (ANDconst <t> [63] y))
  1779  	(CSEL0 <typ.UInt64> [cc] (SRL <typ.UInt64> x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))
  1780  		(CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))))) && cc == OpARM64LessThanU
  1781  	=> (ROR x (NEG <t> y))
  1782  ((ADD|OR|XOR) (SRL <typ.UInt64> x (ANDconst <t> [63] y))
  1783  	(CSEL0 <typ.UInt64> [cc] (SLL x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))
  1784  		(CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))))) && cc == OpARM64LessThanU
  1785  	=> (ROR x y)
  1786  
  1787  // These rules match the Go source code like
  1788  //	y &= 31
  1789  //	x << y | x >> (32-y)
  1790  // "|" can also be "^" or "+".
  1791  // As arm64 does not have a ROLW instruction, so ROLW(x, y) is replaced by RORW(x, -y).
  1792  ((ADD|OR|XOR) (SLL x (ANDconst <t> [31] y))
  1793  	(CSEL0 <typ.UInt32> [cc] (SRL <typ.UInt32> (MOVWUreg x) (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))
  1794  		(CMPconst [64]  (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))))) && cc == OpARM64LessThanU
  1795  	=> (RORW x (NEG <t> y))
  1796  ((ADD|OR|XOR) (SRL <typ.UInt32> (MOVWUreg x) (ANDconst <t> [31] y))
  1797  	(CSEL0 <typ.UInt32> [cc] (SLL x (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))
  1798  		(CMPconst [64]  (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))))) && cc == OpARM64LessThanU
  1799  	=> (RORW x y)
  1800  
  1801  // rev16w | rev16
  1802  // ((x>>8) | (x<<8)) => (REV16W x), the type of x is uint16, "|" can also be "^" or "+".
  1803  ((ADDshiftLL|ORshiftLL|XORshiftLL) <typ.UInt16> [8] (UBFX <typ.UInt16> [armBFAuxInt(8, 8)] x) x) => (REV16W x)
  1804  
  1805  // ((x & 0xff00ff00)>>8) | ((x & 0x00ff00ff)<<8), "|" can also be "^" or "+".
  1806  ((ADDshiftLL|ORshiftLL|XORshiftLL) [8] (UBFX [armBFAuxInt(8, 24)] (ANDconst [c1] x)) (ANDconst [c2] x))
  1807  	&& uint32(c1) == 0xff00ff00 && uint32(c2) == 0x00ff00ff
  1808  	=> (REV16W x)
  1809  
  1810  // ((x & 0xff00ff00ff00ff00)>>8) | ((x & 0x00ff00ff00ff00ff)<<8), "|" can also be "^" or "+".
  1811  ((ADDshiftLL|ORshiftLL|XORshiftLL) [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x))
  1812  	&& (uint64(c1) == 0xff00ff00ff00ff00 && uint64(c2) == 0x00ff00ff00ff00ff)
  1813  	=> (REV16 x)
  1814  
  1815  // ((x & 0xff00ff00)>>8) | ((x & 0x00ff00ff)<<8), "|" can also be "^" or "+".
  1816  ((ADDshiftLL|ORshiftLL|XORshiftLL) [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x))
  1817  	&& (uint64(c1) == 0xff00ff00 && uint64(c2) == 0x00ff00ff)
  1818  	=> (REV16 (ANDconst <x.Type> [0xffffffff] x))
  1819  
  1820  // Extract from reg pair
  1821  (ADDshiftLL [c] (SRLconst x [64-c]) x2) => (EXTRconst [64-c] x2 x)
  1822  ( ORshiftLL [c] (SRLconst x [64-c]) x2) => (EXTRconst [64-c] x2 x)
  1823  (XORshiftLL [c] (SRLconst x [64-c]) x2) => (EXTRconst [64-c] x2 x)
  1824  
  1825  (ADDshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
  1826  	=> (EXTRWconst [32-c] x2 x)
  1827  ( ORshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
  1828  	=> (EXTRWconst [32-c] x2 x)
  1829  (XORshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
  1830  	=> (EXTRWconst [32-c] x2 x)
  1831  
  1832  // Rewrite special pairs of shifts to AND.
  1833  // On ARM64 the bitmask can fit into an instruction.
  1834  (SRLconst [c] (SLLconst [c] x)) && 0 < c && c < 64 => (ANDconst [1<<uint(64-c)-1] x) // mask out high bits
  1835  (SLLconst [c] (SRLconst [c] x)) && 0 < c && c < 64 => (ANDconst [^(1<<uint(c)-1)] x) // mask out low bits
  1836  
  1837  // Special case setting bit as 1. An example is math.Copysign(c,-1)
  1838  (ORconst [c1] (ANDconst [c2] x)) && c2|c1 == ^0  => (ORconst [c1] x)
  1839  
  1840  // If the shift amount is larger than the datasize(32, 16, 8), we can optimize to constant 0.
  1841  (MOVWUreg (SLLconst [lc] x)) && lc >= 32 => (MOVDconst [0])
  1842  (MOVHUreg (SLLconst [lc] x)) && lc >= 16 => (MOVDconst [0])
  1843  (MOVBUreg (SLLconst [lc] x)) && lc >= 8 => (MOVDconst [0])
  1844  
  1845  // After zero extension, the upper (64-datasize(32|16|8)) bits are zero, we can optimiza to constant 0.
  1846  (SRLconst [rc] (MOVWUreg x)) && rc >= 32 => (MOVDconst [0])
  1847  (SRLconst [rc] (MOVHUreg x)) && rc >= 16 => (MOVDconst [0])
  1848  (SRLconst [rc] (MOVBUreg x)) && rc >= 8 => (MOVDconst [0])
  1849  
  1850  // bitfield ops
  1851  
  1852  // sbfiz
  1853  // (x << lc) >> rc
  1854  (SRAconst [rc] (SLLconst [lc] x)) && lc > rc => (SBFIZ [armBFAuxInt(lc-rc, 64-lc)] x)
  1855  // int64(x << lc)
  1856  (MOVWreg (SLLconst [lc] x)) && lc < 32 => (SBFIZ [armBFAuxInt(lc, 32-lc)] x)
  1857  (MOVHreg (SLLconst [lc] x)) && lc < 16 => (SBFIZ [armBFAuxInt(lc, 16-lc)] x)
  1858  (MOVBreg (SLLconst [lc] x)) && lc < 8 => (SBFIZ [armBFAuxInt(lc, 8-lc)] x)
  1859  // int64(x) << lc
  1860  (SLLconst [lc] (MOVWreg x))  => (SBFIZ [armBFAuxInt(lc, min(32, 64-lc))] x)
  1861  (SLLconst [lc] (MOVHreg x))  => (SBFIZ [armBFAuxInt(lc, min(16, 64-lc))] x)
  1862  (SLLconst [lc] (MOVBreg x))  => (SBFIZ [armBFAuxInt(lc, min(8, 64-lc))] x)
  1863  
  1864  // sbfx
  1865  // (x << lc) >> rc
  1866  (SRAconst [rc] (SLLconst [lc] x)) && lc <= rc => (SBFX [armBFAuxInt(rc-lc, 64-rc)] x)
  1867  // int64(x) >> rc
  1868  (SRAconst [rc] (MOVWreg x)) && rc < 32 => (SBFX [armBFAuxInt(rc, 32-rc)] x)
  1869  (SRAconst [rc] (MOVHreg x)) && rc < 16 => (SBFX [armBFAuxInt(rc, 16-rc)] x)
  1870  (SRAconst [rc] (MOVBreg x)) && rc < 8 => (SBFX [armBFAuxInt(rc, 8-rc)] x)
  1871  // merge sbfx and sign-extension into sbfx
  1872  (MOVWreg (SBFX [bfc] x)) && bfc.getARM64BFwidth() <= 32 => (SBFX [bfc] x)
  1873  (MOVHreg (SBFX [bfc] x)) && bfc.getARM64BFwidth() <= 16 => (SBFX [bfc] x)
  1874  (MOVBreg (SBFX [bfc] x)) && bfc.getARM64BFwidth() <=  8 => (SBFX [bfc] x)
  1875  
  1876  // sbfiz/sbfx combinations: merge shifts into bitfield ops
  1877  (SRAconst [sc] (SBFIZ [bfc] x)) && sc < bfc.getARM64BFlsb()
  1878  	=> (SBFIZ [armBFAuxInt(bfc.getARM64BFlsb()-sc, bfc.getARM64BFwidth())] x)
  1879  (SRAconst [sc] (SBFIZ [bfc] x)) && sc >= bfc.getARM64BFlsb()
  1880  	&& sc < bfc.getARM64BFlsb()+bfc.getARM64BFwidth()
  1881  	=> (SBFX [armBFAuxInt(sc-bfc.getARM64BFlsb(), bfc.getARM64BFlsb()+bfc.getARM64BFwidth()-sc)] x)
  1882  
  1883  // ubfiz
  1884  // (x << lc) >> rc
  1885  (SRLconst [rc] (SLLconst [lc] x)) && lc > rc => (UBFIZ [armBFAuxInt(lc-rc, 64-lc)] x)
  1886  // uint64(x) << lc
  1887  (SLLconst [lc] (MOVWUreg x))  => (UBFIZ [armBFAuxInt(lc, min(32, 64-lc))] x)
  1888  (SLLconst [lc] (MOVHUreg x))  => (UBFIZ [armBFAuxInt(lc, min(16, 64-lc))] x)
  1889  (SLLconst [lc] (MOVBUreg x))  => (UBFIZ [armBFAuxInt(lc, min(8, 64-lc))] x)
  1890  // uint64(x << lc)
  1891  (MOVWUreg (SLLconst [lc] x)) && lc < 32 => (UBFIZ [armBFAuxInt(lc, 32-lc)] x)
  1892  (MOVHUreg (SLLconst [lc] x)) && lc < 16 => (UBFIZ [armBFAuxInt(lc, 16-lc)] x)
  1893  (MOVBUreg (SLLconst [lc] x)) && lc < 8 => (UBFIZ [armBFAuxInt(lc, 8-lc)] x)
  1894  
  1895  // merge ANDconst into ubfiz
  1896  // (x & ac) << sc
  1897  (SLLconst [sc] (ANDconst [ac] x)) && isARM64BFMask(sc, ac, 0)
  1898  	=> (UBFIZ [armBFAuxInt(sc, arm64BFWidth(ac, 0))] x)
  1899  // (x << sc) & ac
  1900  (ANDconst [ac] (SLLconst [sc] x)) && isARM64BFMask(sc, ac, sc)
  1901  	=> (UBFIZ [armBFAuxInt(sc, arm64BFWidth(ac, sc))] x)
  1902  
  1903  // ubfx
  1904  // (x << lc) >> rc
  1905  (SRLconst [rc] (SLLconst [lc] x)) && lc < rc => (UBFX [armBFAuxInt(rc-lc, 64-rc)] x)
  1906  // uint64(x) >> rc
  1907  (SRLconst [rc] (MOVWUreg x)) && rc < 32 => (UBFX [armBFAuxInt(rc, 32-rc)] x)
  1908  (SRLconst [rc] (MOVHUreg x)) && rc < 16 => (UBFX [armBFAuxInt(rc, 16-rc)] x)
  1909  (SRLconst [rc] (MOVBUreg x)) && rc < 8 => (UBFX [armBFAuxInt(rc, 8-rc)] x)
  1910  // uint64(x >> rc)
  1911  (MOVWUreg (SRLconst [rc] x)) && rc < 32 => (UBFX [armBFAuxInt(rc, 32)] x)
  1912  (MOVHUreg (SRLconst [rc] x)) && rc < 16 => (UBFX [armBFAuxInt(rc, 16)] x)
  1913  (MOVBUreg (SRLconst [rc] x)) && rc < 8 => (UBFX [armBFAuxInt(rc, 8)] x)
  1914  // merge ANDconst into ubfx
  1915  // (x >> sc) & ac
  1916  (ANDconst [ac] (SRLconst [sc] x)) && isARM64BFMask(sc, ac, 0)
  1917  	=> (UBFX [armBFAuxInt(sc, arm64BFWidth(ac, 0))] x)
  1918  // (x & ac) >> sc
  1919  (SRLconst [sc] (ANDconst [ac] x)) && isARM64BFMask(sc, ac, sc)
  1920  	=> (UBFX [armBFAuxInt(sc, arm64BFWidth(ac, sc))] x)
  1921  
  1922  // merge ubfx and zerso-extension into ubfx
  1923  (MOVWUreg (UBFX [bfc] x)) && bfc.getARM64BFwidth() <= 32 => (UBFX [bfc] x)
  1924  (MOVHUreg (UBFX [bfc] x)) && bfc.getARM64BFwidth() <= 16 => (UBFX [bfc] x)
  1925  (MOVBUreg (UBFX [bfc] x)) && bfc.getARM64BFwidth() <=  8 => (UBFX [bfc] x)
  1926  
  1927  // ubfiz/ubfx combinations: merge shifts into bitfield ops
  1928  (SRLconst [sc] (UBFX [bfc] x)) && sc < bfc.getARM64BFwidth()
  1929  	=> (UBFX [armBFAuxInt(bfc.getARM64BFlsb()+sc, bfc.getARM64BFwidth()-sc)] x)
  1930  (UBFX [bfc] (SRLconst [sc] x)) && sc+bfc.getARM64BFwidth()+bfc.getARM64BFlsb() < 64
  1931  	=> (UBFX [armBFAuxInt(bfc.getARM64BFlsb()+sc, bfc.getARM64BFwidth())] x)
  1932  (SLLconst [sc] (UBFIZ [bfc] x)) && sc+bfc.getARM64BFwidth()+bfc.getARM64BFlsb() < 64
  1933  	=> (UBFIZ [armBFAuxInt(bfc.getARM64BFlsb()+sc, bfc.getARM64BFwidth())] x)
  1934  (UBFIZ [bfc] (SLLconst [sc] x)) && sc < bfc.getARM64BFwidth()
  1935  	=> (UBFIZ [armBFAuxInt(bfc.getARM64BFlsb()+sc, bfc.getARM64BFwidth()-sc)] x)
  1936  // ((x << c1) >> c2) >> c3
  1937  (SRLconst [sc] (UBFIZ [bfc] x)) && sc == bfc.getARM64BFlsb()
  1938  	=> (ANDconst [1<<uint(bfc.getARM64BFwidth())-1] x)
  1939  (SRLconst [sc] (UBFIZ [bfc] x)) && sc < bfc.getARM64BFlsb()
  1940  	=> (UBFIZ [armBFAuxInt(bfc.getARM64BFlsb()-sc, bfc.getARM64BFwidth())] x)
  1941  (SRLconst [sc] (UBFIZ [bfc] x)) && sc > bfc.getARM64BFlsb()
  1942  	&& sc < bfc.getARM64BFlsb()+bfc.getARM64BFwidth()
  1943  	=> (UBFX [armBFAuxInt(sc-bfc.getARM64BFlsb(), bfc.getARM64BFlsb()+bfc.getARM64BFwidth()-sc)] x)
  1944  // ((x << c1) << c2) >> c3
  1945  (UBFX [bfc] (SLLconst [sc] x)) && sc == bfc.getARM64BFlsb()
  1946  	=> (ANDconst [1<<uint(bfc.getARM64BFwidth())-1] x)
  1947  (UBFX [bfc] (SLLconst [sc] x)) && sc < bfc.getARM64BFlsb()
  1948  	=> (UBFX [armBFAuxInt(bfc.getARM64BFlsb()-sc, bfc.getARM64BFwidth())] x)
  1949  (UBFX [bfc] (SLLconst [sc] x)) && sc > bfc.getARM64BFlsb()
  1950  	&& sc < bfc.getARM64BFlsb()+bfc.getARM64BFwidth()
  1951  	=> (UBFIZ [armBFAuxInt(sc-bfc.getARM64BFlsb(), bfc.getARM64BFlsb()+bfc.getARM64BFwidth()-sc)] x)
  1952  
  1953  // bfi
  1954  (OR (UBFIZ [bfc] x) (ANDconst [ac] y))
  1955  	&& ac == ^((1<<uint(bfc.getARM64BFwidth())-1) << uint(bfc.getARM64BFlsb()))
  1956  	=> (BFI [bfc] y x)
  1957  (ORshiftRL [rc] (ANDconst [ac] x) (SLLconst [lc] y))
  1958  	&& lc > rc && ac == ^((1<<uint(64-lc)-1) << uint64(lc-rc))
  1959  	=> (BFI [armBFAuxInt(lc-rc, 64-lc)] x y)
  1960  // bfxil
  1961  (OR (UBFX [bfc] x) (ANDconst [ac] y)) && ac == ^(1<<uint(bfc.getARM64BFwidth())-1)
  1962  	=> (BFXIL [bfc] y x)
  1963  (ORshiftLL [sc] (UBFX [bfc] x) (SRLconst [sc] y)) && sc == bfc.getARM64BFwidth()
  1964  	=> (BFXIL [bfc] y x)
  1965  (ORshiftRL [rc] (ANDconst [ac] y) (SLLconst [lc] x)) && lc < rc && ac == ^((1<<uint(64-rc)-1))
  1966  	=> (BFXIL [armBFAuxInt(rc-lc, 64-rc)] y x)
  1967  
  1968  // do combined loads
  1969  // little endian loads
  1970  // b[0] | b[1]<<8 => load 16-bit
  1971  (ORshiftLL <t> [8]
  1972  	y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))
  1973  	y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
  1974  	&& i1 == i0+1
  1975  	&& x0.Uses == 1 && x1.Uses == 1
  1976  	&& y0.Uses == 1 && y1.Uses == 1
  1977  	&& mergePoint(b,x0,x1) != nil
  1978  	&& clobber(x0, x1, y0, y1)
  1979  	=> @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
  1980  (ORshiftLL <t> [8]
  1981  	y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))
  1982  	y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  1983  	&& s == nil
  1984  	&& x0.Uses == 1 && x1.Uses == 1
  1985  	&& y0.Uses == 1 && y1.Uses == 1
  1986  	&& mergePoint(b,x0,x1) != nil
  1987  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  1988  	&& clobber(x0, x1, y0, y1)
  1989  	=> @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr0 idx0 mem)
  1990  (ORshiftLL <t> [8]
  1991  	y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))
  1992  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  1993  	&& x0.Uses == 1 && x1.Uses == 1
  1994  	&& y0.Uses == 1 && y1.Uses == 1
  1995  	&& mergePoint(b,x0,x1) != nil
  1996  	&& clobber(x0, x1, y0, y1)
  1997  	=> @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr idx mem)
  1998  
  1999  // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 => load 32-bit
  2000  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2001  	            x0:(MOVHUload [i0] {s} p mem)
  2002  	y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem)))
  2003  	y2:(MOVDnop x2:(MOVBUload [i3] {s} p mem)))
  2004  	&& i2 == i0+2
  2005  	&& i3 == i0+3
  2006  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2007  	&& y1.Uses == 1 && y2.Uses == 1
  2008  	&& o0.Uses == 1
  2009  	&& mergePoint(b,x0,x1,x2) != nil
  2010  	&& clobber(x0, x1, x2, y1, y2, o0)
  2011  	=> @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
  2012  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2013  	            x0:(MOVHUloadidx ptr0 idx0 mem)
  2014  	y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADD ptr1 idx1) mem)))
  2015  	y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
  2016  	&& s == nil
  2017  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2018  	&& y1.Uses == 1 && y2.Uses == 1
  2019  	&& o0.Uses == 1
  2020  	&& mergePoint(b,x0,x1,x2) != nil
  2021  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2022  	&& isSamePtr(p1, p)
  2023  	&& clobber(x0, x1, x2, y1, y2, o0)
  2024  	=> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 idx0 mem)
  2025  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2026  	            x0:(MOVHUloadidx ptr idx mem)
  2027  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2028  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2029  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2030  	&& y1.Uses == 1 && y2.Uses == 1
  2031  	&& o0.Uses == 1
  2032  	&& mergePoint(b,x0,x1,x2) != nil
  2033  	&& clobber(x0, x1, x2, y1, y2, o0)
  2034  	=> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr idx mem)
  2035  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2036  	            x0:(MOVHUloadidx2 ptr0 idx0 mem)
  2037  	y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADDshiftLL [1] ptr1 idx1) mem)))
  2038  	y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
  2039  	&& s == nil
  2040  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2041  	&& y1.Uses == 1 && y2.Uses == 1
  2042  	&& o0.Uses == 1
  2043  	&& mergePoint(b,x0,x1,x2) != nil
  2044  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2045  	&& isSamePtr(p1, p)
  2046  	&& clobber(x0, x1, x2, y1, y2, o0)
  2047  	=> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 (SLLconst <idx0.Type> [1] idx0) mem)
  2048  
  2049  // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 => load 64-bit
  2050  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2051  	            x0:(MOVWUload [i0] {s} p mem)
  2052  	y1:(MOVDnop x1:(MOVBUload [i4] {s} p mem)))
  2053  	y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem)))
  2054  	y3:(MOVDnop x3:(MOVBUload [i6] {s} p mem)))
  2055  	y4:(MOVDnop x4:(MOVBUload [i7] {s} p mem)))
  2056  	&& i4 == i0+4
  2057  	&& i5 == i0+5
  2058  	&& i6 == i0+6
  2059  	&& i7 == i0+7
  2060  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2061  	&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2062  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2063  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2064  	&& clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
  2065  	=> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
  2066  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2067  	            x0:(MOVWUloadidx ptr0 idx0 mem)
  2068  	y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADD ptr1 idx1) mem)))
  2069  	y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
  2070  	y3:(MOVDnop x3:(MOVBUload [6] {s} p mem)))
  2071  	y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
  2072  	&& s == nil
  2073  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2074  	&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2075  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2076  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2077  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2078  	&& isSamePtr(p1, p)
  2079  	&& clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
  2080  	=> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 idx0 mem)
  2081  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2082  	            x0:(MOVWUloadidx4 ptr0 idx0 mem)
  2083  	y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADDshiftLL [2] ptr1 idx1) mem)))
  2084  	y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
  2085  	y3:(MOVDnop x3:(MOVBUload [6] {s} p mem)))
  2086  	y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
  2087  	&& s == nil
  2088  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2089  	&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2090  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2091  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2092  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2093  	&& isSamePtr(p1, p)
  2094  	&& clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
  2095  	=> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 (SLLconst <idx0.Type> [2] idx0) mem)
  2096  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2097  	            x0:(MOVWUloadidx ptr idx mem)
  2098  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
  2099  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
  2100  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
  2101  	y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
  2102  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2103  	&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2104  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2105  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2106  	&& clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
  2107  	=> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr idx mem)
  2108  
  2109  // b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] => load 32-bit
  2110  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2111  	y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem)))
  2112  	y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem)))
  2113  	y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem)))
  2114  	y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)))
  2115  	&& i1 == i0+1
  2116  	&& i2 == i0+2
  2117  	&& i3 == i0+3
  2118  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2119  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2120  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2121  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2122  	&& clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
  2123  	=> @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
  2124  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2125  	y0:(MOVDnop x0:(MOVBUload [3] {s} p mem)))
  2126  	y1:(MOVDnop x1:(MOVBUload [2] {s} p mem)))
  2127  	y2:(MOVDnop x2:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2128  	y3:(MOVDnop x3:(MOVBUloadidx ptr0 idx0 mem)))
  2129  	&& s == nil
  2130  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2131  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2132  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2133  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2134  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2135  	&& isSamePtr(p1, p)
  2136  	&& clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
  2137  	=> @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr0 idx0 mem)
  2138  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2139  	y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2140  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2141  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2142  	y3:(MOVDnop x3:(MOVBUloadidx ptr idx mem)))
  2143  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2144  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2145  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2146  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2147  	&& clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
  2148  	=> @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr idx mem)
  2149  
  2150  // b[7]<<56 | b[6]<<48 | b[5]<<40 | b[4]<<32 | b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] => load 64-bit
  2151  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2152  	y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem)))
  2153  	y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem)))
  2154  	y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem)))
  2155  	y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem)))
  2156  	y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem)))
  2157  	y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem)))
  2158  	y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem)))
  2159  	y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)))
  2160  	&& i1 == i0+1
  2161  	&& i2 == i0+2
  2162  	&& i3 == i0+3
  2163  	&& i4 == i0+4
  2164  	&& i5 == i0+5
  2165  	&& i6 == i0+6
  2166  	&& i7 == i0+7
  2167  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2168  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2169  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2170  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2171  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2172  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2173  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2174  	&& clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
  2175  	=> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
  2176  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2177  	y0:(MOVDnop x0:(MOVBUload [7] {s} p mem)))
  2178  	y1:(MOVDnop x1:(MOVBUload [6] {s} p mem)))
  2179  	y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
  2180  	y3:(MOVDnop x3:(MOVBUload [4] {s} p mem)))
  2181  	y4:(MOVDnop x4:(MOVBUload [3] {s} p mem)))
  2182  	y5:(MOVDnop x5:(MOVBUload [2] {s} p mem)))
  2183  	y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2184  	y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)))
  2185  	&& s == nil
  2186  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2187  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2188  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2189  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2190  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2191  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2192  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2193  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2194  	&& isSamePtr(p1, p)
  2195  	&& clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
  2196  	=> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
  2197  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2198  	y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
  2199  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
  2200  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
  2201  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
  2202  	y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2203  	y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2204  	y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2205  	y7:(MOVDnop x7:(MOVBUloadidx ptr idx mem)))
  2206  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2207  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2208  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2209  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2210  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2211  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2212  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2213  	&& clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
  2214  	=> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr idx mem)
  2215  
  2216  // big endian loads
  2217  // b[1] | b[0]<<8 => load 16-bit, reverse
  2218  (ORshiftLL <t> [8]
  2219  	y0:(MOVDnop x0:(MOVBUload [i1] {s} p mem))
  2220  	y1:(MOVDnop x1:(MOVBUload [i0] {s} p mem)))
  2221  	&& i1 == i0+1
  2222  	&& x0.Uses == 1 && x1.Uses == 1
  2223  	&& y0.Uses == 1 && y1.Uses == 1
  2224  	&& mergePoint(b,x0,x1) != nil
  2225  	&& clobber(x0, x1, y0, y1)
  2226  	=> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i0] {s} p mem))
  2227  (ORshiftLL <t> [8]
  2228  	y0:(MOVDnop x0:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))
  2229  	y1:(MOVDnop x1:(MOVBUloadidx ptr0 idx0 mem)))
  2230  	&& s == nil
  2231  	&& x0.Uses == 1 && x1.Uses == 1
  2232  	&& y0.Uses == 1 && y1.Uses == 1
  2233  	&& mergePoint(b,x0,x1) != nil
  2234  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2235  	&& clobber(x0, x1, y0, y1)
  2236  	=> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr0 idx0 mem))
  2237  (ORshiftLL <t> [8]
  2238  	y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [1] idx) mem))
  2239  	y1:(MOVDnop x1:(MOVBUloadidx ptr idx mem)))
  2240  	&& x0.Uses == 1 && x1.Uses == 1
  2241  	&& y0.Uses == 1 && y1.Uses == 1
  2242  	&& mergePoint(b,x0,x1) != nil
  2243  	&& clobber(x0, x1, y0, y1)
  2244  	=> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr idx mem))
  2245  
  2246  // b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 => load 32-bit, reverse
  2247  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2248  	y0:(REV16W  x0:(MOVHUload [i2] {s} p mem))
  2249  	y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
  2250  	y2:(MOVDnop x2:(MOVBUload [i0] {s} p mem)))
  2251  	&& i1 == i0+1
  2252  	&& i2 == i0+2
  2253  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2254  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
  2255  	&& o0.Uses == 1
  2256  	&& mergePoint(b,x0,x1,x2) != nil
  2257  	&& clobber(x0, x1, x2, y0, y1, y2, o0)
  2258  	=> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
  2259  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2260  	y0:(REV16W  x0:(MOVHUload [2] {s} p mem))
  2261  	y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2262  	y2:(MOVDnop x2:(MOVBUloadidx ptr0 idx0 mem)))
  2263  	&& s == nil
  2264  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2265  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
  2266  	&& o0.Uses == 1
  2267  	&& mergePoint(b,x0,x1,x2) != nil
  2268  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2269  	&& isSamePtr(p1, p)
  2270  	&& clobber(x0, x1, x2, y0, y1, y2, o0)
  2271  	=> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
  2272  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2273  	y0:(REV16W  x0:(MOVHUloadidx ptr (ADDconst [2] idx) mem))
  2274  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2275  	y2:(MOVDnop x2:(MOVBUloadidx ptr idx mem)))
  2276  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2277  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
  2278  	&& o0.Uses == 1
  2279  	&& mergePoint(b,x0,x1,x2) != nil
  2280  	&& clobber(x0, x1, x2, y0, y1, y2, o0)
  2281  	=> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
  2282  
  2283  // b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 => load 64-bit, reverse
  2284  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2285  	y0:(REVW    x0:(MOVWUload [i4] {s} p mem))
  2286  	y1:(MOVDnop x1:(MOVBUload [i3] {s} p mem)))
  2287  	y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
  2288  	y3:(MOVDnop x3:(MOVBUload [i1] {s} p mem)))
  2289  	y4:(MOVDnop x4:(MOVBUload [i0] {s} p mem)))
  2290  	&& i1 == i0+1
  2291  	&& i2 == i0+2
  2292  	&& i3 == i0+3
  2293  	&& i4 == i0+4
  2294  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2295  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2296  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2297  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2298  	&& clobber(x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, o0, o1, o2)
  2299  	=> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
  2300  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2301  	y0:(REVW    x0:(MOVWUload [4] {s} p mem))
  2302  	y1:(MOVDnop x1:(MOVBUload [3] {s} p mem)))
  2303  	y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
  2304  	y3:(MOVDnop x3:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2305  	y4:(MOVDnop x4:(MOVBUloadidx ptr0 idx0 mem)))
  2306  	&& s == nil
  2307  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2308  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2309  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2310  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2311  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2312  	&& isSamePtr(p1, p)
  2313  	&& clobber(x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, o0, o1, o2)
  2314  	=> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
  2315  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2316  	y0:(REVW    x0:(MOVWUloadidx ptr (ADDconst [4] idx) mem))
  2317  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2318  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2319  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2320  	y4:(MOVDnop x4:(MOVBUloadidx ptr idx mem)))
  2321  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2322  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2323  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2324  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2325  	&& clobber(x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, o0, o1, o2)
  2326  	=> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr idx mem))
  2327  
  2328  // b[0]<<24 | b[1]<<16 | b[2]<<8 | b[3] => load 32-bit, reverse
  2329  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2330  	y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)))
  2331  	y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
  2332  	y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
  2333  	y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
  2334  	&& i1 == i0+1
  2335  	&& i2 == i0+2
  2336  	&& i3 == i0+3
  2337  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2338  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2339  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2340  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2341  	&& clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
  2342  	=> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
  2343  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2344  	y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)))
  2345  	y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2346  	y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
  2347  	y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
  2348  	&& s == nil
  2349  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2350  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2351  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2352  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2353  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2354  	&& isSamePtr(p1, p)
  2355  	&& clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
  2356  	=> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
  2357  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2358  	y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)))
  2359  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2360  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2361  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2362  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2363  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2364  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2365  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2366  	&& clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
  2367  	=> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
  2368  
  2369  // b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 | b[4]<<24 | b[5]<<16 | b[6]<<8 | b[7] => load 64-bit, reverse
  2370  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2371  	y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)))
  2372  	y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
  2373  	y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
  2374  	y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
  2375  	y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem)))
  2376  	y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem)))
  2377  	y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem)))
  2378  	y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)))
  2379  	&& i1 == i0+1
  2380  	&& i2 == i0+2
  2381  	&& i3 == i0+3
  2382  	&& i4 == i0+4
  2383  	&& i5 == i0+5
  2384  	&& i6 == i0+6
  2385  	&& i7 == i0+7
  2386  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2387  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2388  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2389  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2390  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2391  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2392  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2393  	&& clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
  2394  	=> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
  2395  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2396  	y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)))
  2397  	y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2398  	y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
  2399  	y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
  2400  	y4:(MOVDnop x4:(MOVBUload [4] {s} p mem)))
  2401  	y5:(MOVDnop x5:(MOVBUload [5] {s} p mem)))
  2402  	y6:(MOVDnop x6:(MOVBUload [6] {s} p mem)))
  2403  	y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)))
  2404  	&& s == nil
  2405  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2406  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2407  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2408  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2409  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2410  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2411  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2412  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2413  	&& isSamePtr(p1, p)
  2414  	&& clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
  2415  	=> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
  2416  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2417  	y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)))
  2418  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2419  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2420  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2421  	y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
  2422  	y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
  2423  	y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
  2424  	y7:(MOVDnop x7:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
  2425  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2426  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2427  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2428  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2429  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2430  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2431  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2432  	&& clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
  2433  	=> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr idx mem))
  2434  
  2435  // Combine zero stores into larger (unaligned) stores.
  2436  (MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
  2437  	&& x.Uses == 1
  2438  	&& areAdjacentOffsets(int64(i),int64(j),1)
  2439  	&& isSamePtr(ptr0, ptr1)
  2440  	&& clobber(x)
  2441  	=> (MOVHstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
  2442  (MOVBstorezero [1] {s} (ADD ptr0 idx0) x:(MOVBstorezeroidx ptr1 idx1 mem))
  2443  	&& x.Uses == 1
  2444  	&& s == nil
  2445  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2446  	&& clobber(x)
  2447  	=> (MOVHstorezeroidx ptr1 idx1 mem)
  2448  (MOVBstorezeroidx ptr (ADDconst [1] idx) x:(MOVBstorezeroidx ptr idx mem))
  2449  	&& x.Uses == 1
  2450  	&& clobber(x)
  2451  	=> (MOVHstorezeroidx ptr idx mem)
  2452  (MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
  2453  	&& x.Uses == 1
  2454  	&& areAdjacentOffsets(int64(i),int64(j),2)
  2455  	&& isSamePtr(ptr0, ptr1)
  2456  	&& clobber(x)
  2457  	=> (MOVWstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
  2458  (MOVHstorezero [2] {s} (ADD ptr0 idx0) x:(MOVHstorezeroidx ptr1 idx1 mem))
  2459  	&& x.Uses == 1
  2460  	&& s == nil
  2461  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2462  	&& clobber(x)
  2463  	=> (MOVWstorezeroidx ptr1 idx1 mem)
  2464  (MOVHstorezeroidx ptr (ADDconst [2] idx) x:(MOVHstorezeroidx ptr idx mem))
  2465  	&& x.Uses == 1
  2466  	&& clobber(x)
  2467  	=> (MOVWstorezeroidx ptr idx mem)
  2468  (MOVHstorezero [2] {s} (ADDshiftLL [1] ptr0 idx0) x:(MOVHstorezeroidx2 ptr1 idx1 mem))
  2469  	&& x.Uses == 1
  2470  	&& s == nil
  2471  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2472  	&& clobber(x)
  2473  	=> (MOVWstorezeroidx ptr1 (SLLconst <idx1.Type> [1] idx1) mem)
  2474  (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
  2475  	&& x.Uses == 1
  2476  	&& areAdjacentOffsets(int64(i),int64(j),4)
  2477  	&& isSamePtr(ptr0, ptr1)
  2478  	&& clobber(x)
  2479  	=> (MOVDstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
  2480  (MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem))
  2481  	&& x.Uses == 1
  2482  	&& s == nil
  2483  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2484  	&& clobber(x)
  2485  	=> (MOVDstorezeroidx ptr1 idx1 mem)
  2486  (MOVWstorezeroidx ptr (ADDconst [4] idx) x:(MOVWstorezeroidx ptr idx mem))
  2487  	&& x.Uses == 1
  2488  	&& clobber(x)
  2489  	=> (MOVDstorezeroidx ptr idx mem)
  2490  (MOVWstorezero [4] {s} (ADDshiftLL [2] ptr0 idx0) x:(MOVWstorezeroidx4 ptr1 idx1 mem))
  2491  	&& x.Uses == 1
  2492  	&& s == nil
  2493  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2494  	&& clobber(x)
  2495  	=> (MOVDstorezeroidx ptr1 (SLLconst <idx1.Type> [2] idx1) mem)
  2496  (MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
  2497  	&& x.Uses == 1
  2498  	&& areAdjacentOffsets(int64(i),int64(j),8)
  2499  	&& isSamePtr(ptr0, ptr1)
  2500  	&& clobber(x)
  2501  	=> (MOVQstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
  2502  (MOVDstorezero [8] {s} p0:(ADD ptr0 idx0) x:(MOVDstorezeroidx ptr1 idx1 mem))
  2503  	&& x.Uses == 1
  2504  	&& s == nil
  2505  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2506  	&& clobber(x)
  2507  	=> (MOVQstorezero [0] {s} p0 mem)
  2508  (MOVDstorezero [8] {s} p0:(ADDshiftLL [3] ptr0 idx0) x:(MOVDstorezeroidx8 ptr1 idx1 mem))
  2509  	&& x.Uses == 1
  2510  	&& s == nil
  2511  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2512  	&& clobber(x)
  2513  	=> (MOVQstorezero [0] {s} p0 mem)
  2514  
  2515  // Combine stores into larger (unaligned) stores.
  2516  (MOVBstore [i] {s} ptr0 (SRLconst [8] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
  2517  	&& x.Uses == 1
  2518  	&& isSamePtr(ptr0, ptr1)
  2519  	&& clobber(x)
  2520  	=> (MOVHstore [i-1] {s} ptr0 w mem)
  2521  (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] w) x:(MOVBstoreidx ptr1 idx1 w mem))
  2522  	&& x.Uses == 1
  2523  	&& s == nil
  2524  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2525  	&& clobber(x)
  2526  	=> (MOVHstoreidx ptr1 idx1 w mem)
  2527  (MOVBstoreidx ptr (ADDconst [1] idx) (SRLconst [8] w) x:(MOVBstoreidx ptr idx w mem))
  2528  	&& x.Uses == 1
  2529  	&& clobber(x)
  2530  	=> (MOVHstoreidx ptr idx w mem)
  2531  (MOVBstore [i] {s} ptr0 (UBFX [armBFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
  2532  	&& x.Uses == 1
  2533  	&& isSamePtr(ptr0, ptr1)
  2534  	&& clobber(x)
  2535  	=> (MOVHstore [i-1] {s} ptr0 w mem)
  2536  (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(8, 8)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
  2537  	&& x.Uses == 1
  2538  	&& s == nil
  2539  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2540  	&& clobber(x)
  2541  	=> (MOVHstoreidx ptr1 idx1 w mem)
  2542  (MOVBstore [i] {s} ptr0 (UBFX [armBFAuxInt(8, 24)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
  2543  	&& x.Uses == 1
  2544  	&& isSamePtr(ptr0, ptr1)
  2545  	&& clobber(x)
  2546  	=> (MOVHstore [i-1] {s} ptr0 w mem)
  2547  (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(8, 24)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
  2548  	&& x.Uses == 1
  2549  	&& s == nil
  2550  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2551  	&& clobber(x)
  2552  	=> (MOVHstoreidx ptr1 idx1 w mem)
  2553  (MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
  2554  	&& x.Uses == 1
  2555  	&& isSamePtr(ptr0, ptr1)
  2556  	&& clobber(x)
  2557  	=> (MOVHstore [i-1] {s} ptr0 w mem)
  2558  (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w mem))
  2559  	&& x.Uses == 1
  2560  	&& s == nil
  2561  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2562  	&& clobber(x)
  2563  	=> (MOVHstoreidx ptr1 idx1 w mem)
  2564  (MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem))
  2565  	&& x.Uses == 1
  2566  	&& isSamePtr(ptr0, ptr1)
  2567  	&& clobber(x)
  2568  	=> (MOVHstore [i-1] {s} ptr0 w0 mem)
  2569  (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] w) mem))
  2570  	&& x.Uses == 1
  2571  	&& s == nil
  2572  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2573  	&& clobber(x)
  2574  	=> (MOVHstoreidx ptr1 idx1 w0 mem)
  2575  (MOVBstore [i] {s} ptr0 (UBFX [bfc] w) x:(MOVBstore [i-1] {s} ptr1 w0:(UBFX [bfc2] w) mem))
  2576  	&& x.Uses == 1
  2577  	&& isSamePtr(ptr0, ptr1)
  2578  	&& bfc.getARM64BFwidth() == 32 - bfc.getARM64BFlsb()
  2579  	&& bfc2.getARM64BFwidth() == 32 - bfc2.getARM64BFlsb()
  2580  	&& bfc2.getARM64BFlsb() == bfc.getARM64BFlsb() - 8
  2581  	&& clobber(x)
  2582  	=> (MOVHstore [i-1] {s} ptr0 w0 mem)
  2583  (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [bfc] w) x:(MOVBstoreidx ptr1 idx1 w0:(UBFX [bfc2] w) mem))
  2584  	&& x.Uses == 1
  2585  	&& s == nil
  2586  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2587  	&& bfc.getARM64BFwidth() == 32 - bfc.getARM64BFlsb()
  2588  	&& bfc2.getARM64BFwidth() == 32 - bfc2.getARM64BFlsb()
  2589  	&& bfc2.getARM64BFlsb() == bfc.getARM64BFlsb() - 8
  2590  	&& clobber(x)
  2591  	=> (MOVHstoreidx ptr1 idx1 w0 mem)
  2592  (MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
  2593  	&& x.Uses == 1
  2594  	&& isSamePtr(ptr0, ptr1)
  2595  	&& clobber(x)
  2596  	=> (MOVHstore [i-1] {s} ptr0 w0 mem)
  2597  (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
  2598  	&& x.Uses == 1
  2599  	&& s == nil
  2600  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2601  	&& clobber(x)
  2602  	=> (MOVHstoreidx ptr1 idx1 w0 mem)
  2603  (MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
  2604  	&& x.Uses == 1
  2605  	&& isSamePtr(ptr0, ptr1)
  2606  	&& clobber(x)
  2607  	=> (MOVWstore [i-2] {s} ptr0 w mem)
  2608  (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx ptr1 idx1 w mem))
  2609  	&& x.Uses == 1
  2610  	&& s == nil
  2611  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2612  	&& clobber(x)
  2613  	=> (MOVWstoreidx ptr1 idx1 w mem)
  2614  (MOVHstoreidx ptr (ADDconst [2] idx) (SRLconst [16] w) x:(MOVHstoreidx ptr idx w mem))
  2615  	&& x.Uses == 1
  2616  	&& clobber(x)
  2617  	=> (MOVWstoreidx ptr idx w mem)
  2618  (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
  2619  	&& x.Uses == 1
  2620  	&& s == nil
  2621  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2622  	&& clobber(x)
  2623  	=> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
  2624  (MOVHstore [i] {s} ptr0 (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
  2625  	&& x.Uses == 1
  2626  	&& isSamePtr(ptr0, ptr1)
  2627  	&& clobber(x)
  2628  	=> (MOVWstore [i-2] {s} ptr0 w mem)
  2629  (MOVHstore [2] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstoreidx ptr1 idx1 w mem))
  2630  	&& x.Uses == 1
  2631  	&& s == nil
  2632  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2633  	&& clobber(x)
  2634  	=> (MOVWstoreidx ptr1 idx1 w mem)
  2635  (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
  2636  	&& x.Uses == 1
  2637  	&& s == nil
  2638  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2639  	&& clobber(x)
  2640  	=> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
  2641  (MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
  2642  	&& x.Uses == 1
  2643  	&& isSamePtr(ptr0, ptr1)
  2644  	&& clobber(x)
  2645  	=> (MOVWstore [i-2] {s} ptr0 w mem)
  2646  (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx ptr1 idx1 w mem))
  2647  	&& x.Uses == 1
  2648  	&& s == nil
  2649  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2650  	&& clobber(x)
  2651  	=> (MOVWstoreidx ptr1 idx1 w mem)
  2652  (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx2 ptr1 idx1 w mem))
  2653  	&& x.Uses == 1
  2654  	&& s == nil
  2655  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2656  	&& clobber(x)
  2657  	=> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
  2658  (MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem))
  2659  	&& x.Uses == 1
  2660  	&& isSamePtr(ptr0, ptr1)
  2661  	&& clobber(x)
  2662  	=> (MOVWstore [i-2] {s} ptr0 w0 mem)
  2663  (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx ptr1 idx1 w0:(SRLconst [j-16] w) mem))
  2664  	&& x.Uses == 1
  2665  	&& s == nil
  2666  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2667  	&& clobber(x)
  2668  	=> (MOVWstoreidx ptr1 idx1 w0 mem)
  2669  (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx2 ptr1 idx1 w0:(SRLconst [j-16] w) mem))
  2670  	&& x.Uses == 1
  2671  	&& s == nil
  2672  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2673  	&& clobber(x)
  2674  	=> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w0 mem)
  2675  (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
  2676  	&& x.Uses == 1
  2677  	&& isSamePtr(ptr0, ptr1)
  2678  	&& clobber(x)
  2679  	=> (MOVDstore [i-4] {s} ptr0 w mem)
  2680  (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem))
  2681  	&& x.Uses == 1
  2682  	&& s == nil
  2683  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2684  	&& clobber(x)
  2685  	=> (MOVDstoreidx ptr1 idx1 w mem)
  2686  (MOVWstoreidx ptr (ADDconst [4] idx) (SRLconst [32] w) x:(MOVWstoreidx ptr idx w mem))
  2687  	&& x.Uses == 1
  2688  	&& clobber(x)
  2689  	=> (MOVDstoreidx ptr idx w mem)
  2690  (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx4 ptr1 idx1 w mem))
  2691  	&& x.Uses == 1
  2692  	&& s == nil
  2693  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2694  	&& clobber(x)
  2695  	=> (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w mem)
  2696  (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
  2697  	&& x.Uses == 1
  2698  	&& isSamePtr(ptr0, ptr1)
  2699  	&& clobber(x)
  2700  	=> (MOVDstore [i-4] {s} ptr0 w0 mem)
  2701  (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem))
  2702  	&& x.Uses == 1
  2703  	&& s == nil
  2704  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2705  	&& clobber(x)
  2706  	=> (MOVDstoreidx ptr1 idx1 w0 mem)
  2707  (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx4 ptr1 idx1 w0:(SRLconst [j-32] w) mem))
  2708  	&& x.Uses == 1
  2709  	&& s == nil
  2710  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2711  	&& clobber(x)
  2712  	=> (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w0 mem)
  2713  (MOVBstore [i] {s} ptr w
  2714  	x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
  2715  	x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
  2716  	x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w)
  2717  	x3:(MOVBstore [i-4] {s} ptr (SRLconst [32] w)
  2718  	x4:(MOVBstore [i-5] {s} ptr (SRLconst [40] w)
  2719  	x5:(MOVBstore [i-6] {s} ptr (SRLconst [48] w)
  2720  	x6:(MOVBstore [i-7] {s} ptr (SRLconst [56] w) mem))))))))
  2721  	&& x0.Uses == 1
  2722  	&& x1.Uses == 1
  2723  	&& x2.Uses == 1
  2724  	&& x3.Uses == 1
  2725  	&& x4.Uses == 1
  2726  	&& x5.Uses == 1
  2727  	&& x6.Uses == 1
  2728  	&& clobber(x0, x1, x2, x3, x4, x5, x6)
  2729  	=> (MOVDstore [i-7] {s} ptr (REV <w.Type> w) mem)
  2730  (MOVBstore [7] {s} p w
  2731  	x0:(MOVBstore [6] {s} p (SRLconst [8] w)
  2732  	x1:(MOVBstore [5] {s} p (SRLconst [16] w)
  2733  	x2:(MOVBstore [4] {s} p (SRLconst [24] w)
  2734  	x3:(MOVBstore [3] {s} p (SRLconst [32] w)
  2735  	x4:(MOVBstore [2] {s} p (SRLconst [40] w)
  2736  	x5:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [48] w)
  2737  	x6:(MOVBstoreidx ptr0 idx0 (SRLconst [56] w) mem))))))))
  2738  	&& x0.Uses == 1
  2739  	&& x1.Uses == 1
  2740  	&& x2.Uses == 1
  2741  	&& x3.Uses == 1
  2742  	&& x4.Uses == 1
  2743  	&& x5.Uses == 1
  2744  	&& x6.Uses == 1
  2745  	&& s == nil
  2746  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2747  	&& isSamePtr(p1, p)
  2748  	&& clobber(x0, x1, x2, x3, x4, x5, x6)
  2749  	=> (MOVDstoreidx ptr0 idx0 (REV <w.Type> w) mem)
  2750  (MOVBstore [i] {s} ptr w
  2751  	x0:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 24)] w)
  2752  	x1:(MOVBstore [i-2] {s} ptr (UBFX [armBFAuxInt(16, 16)] w)
  2753  	x2:(MOVBstore [i-3] {s} ptr (UBFX [armBFAuxInt(24, 8)] w) mem))))
  2754  	&& x0.Uses == 1
  2755  	&& x1.Uses == 1
  2756  	&& x2.Uses == 1
  2757  	&& clobber(x0, x1, x2)
  2758  	=> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
  2759  (MOVBstore [3] {s} p w
  2760  	x0:(MOVBstore [2] {s} p (UBFX [armBFAuxInt(8, 24)] w)
  2761  	x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (UBFX [armBFAuxInt(16, 16)] w)
  2762  	x2:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(24, 8)] w) mem))))
  2763  	&& x0.Uses == 1
  2764  	&& x1.Uses == 1
  2765  	&& x2.Uses == 1
  2766  	&& s == nil
  2767  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2768  	&& isSamePtr(p1, p)
  2769  	&& clobber(x0, x1, x2)
  2770  	=> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
  2771  (MOVBstoreidx ptr (ADDconst [3] idx) w
  2772  	x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(8, 24)] w)
  2773  	x1:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(16, 16)] w)
  2774  	x2:(MOVBstoreidx ptr idx (UBFX [armBFAuxInt(24, 8)] w) mem))))
  2775  	&& x0.Uses == 1
  2776  	&& x1.Uses == 1
  2777  	&& x2.Uses == 1
  2778  	&& clobber(x0, x1, x2)
  2779  	=> (MOVWstoreidx ptr idx (REVW <w.Type> w) mem)
  2780  (MOVBstoreidx ptr idx w
  2781  	x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8, 24)] w)
  2782  	x1:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(16, 16)] w)
  2783  	x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [armBFAuxInt(24, 8)] w) mem))))
  2784  	&& x0.Uses == 1
  2785  	&& x1.Uses == 1
  2786  	&& x2.Uses == 1
  2787  	&& clobber(x0, x1, x2)
  2788  	=> (MOVWstoreidx ptr idx w mem)
  2789  (MOVBstore [i] {s} ptr w
  2790  	x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w))
  2791  	x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w))
  2792  	x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVDreg w)) mem))))
  2793  	&& x0.Uses == 1
  2794  	&& x1.Uses == 1
  2795  	&& x2.Uses == 1
  2796  	&& clobber(x0, x1, x2)
  2797  	=> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
  2798  (MOVBstore [3] {s} p w
  2799  	x0:(MOVBstore [2] {s} p (SRLconst [8] (MOVDreg w))
  2800  	x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] (MOVDreg w))
  2801  	x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] (MOVDreg w)) mem))))
  2802  	&& x0.Uses == 1
  2803  	&& x1.Uses == 1
  2804  	&& x2.Uses == 1
  2805  	&& s == nil
  2806  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2807  	&& isSamePtr(p1, p)
  2808  	&& clobber(x0, x1, x2)
  2809  	=> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
  2810  (MOVBstore [i] {s} ptr w
  2811  	x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
  2812  	x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
  2813  	x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem))))
  2814  	&& x0.Uses == 1
  2815  	&& x1.Uses == 1
  2816  	&& x2.Uses == 1
  2817  	&& clobber(x0, x1, x2)
  2818  	=> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
  2819  (MOVBstore [3] {s} p w
  2820  	x0:(MOVBstore [2] {s} p (SRLconst [8] w)
  2821  	x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] w)
  2822  	x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] w) mem))))
  2823  	&& x0.Uses == 1
  2824  	&& x1.Uses == 1
  2825  	&& x2.Uses == 1
  2826  	&& s == nil
  2827  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2828  	&& isSamePtr(p1, p)
  2829  	&& clobber(x0, x1, x2)
  2830  	=> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
  2831  (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem))
  2832  	&& x.Uses == 1
  2833  	&& clobber(x)
  2834  	=> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
  2835  (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] w) mem))
  2836  	&& x.Uses == 1
  2837  	&& s == nil
  2838  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2839  	&& clobber(x)
  2840  	=> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
  2841  (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 8)] w) mem))
  2842  	&& x.Uses == 1
  2843  	&& clobber(x)
  2844  	=> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
  2845  (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(8, 8)] w) mem))
  2846  	&& x.Uses == 1
  2847  	&& s == nil
  2848  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2849  	&& clobber(x)
  2850  	=> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
  2851  (MOVBstoreidx ptr (ADDconst [1] idx) w x:(MOVBstoreidx ptr idx (UBFX [armBFAuxInt(8, 8)] w) mem))
  2852  	&& x.Uses == 1
  2853  	&& clobber(x)
  2854  	=> (MOVHstoreidx ptr idx (REV16W <w.Type> w) mem)
  2855  (MOVBstoreidx ptr idx w x:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8, 8)] w) mem))
  2856  	&& x.Uses == 1
  2857  	&& clobber(x)
  2858  	=> (MOVHstoreidx ptr idx w mem)
  2859  (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
  2860  	&& x.Uses == 1
  2861  	&& clobber(x)
  2862  	=> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
  2863  (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem))
  2864  	&& x.Uses == 1
  2865  	&& s == nil
  2866  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2867  	&& clobber(x)
  2868  	=> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
  2869  (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 24)] w) mem))
  2870  	&& x.Uses == 1
  2871  	&& clobber(x)
  2872  	=> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
  2873  (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(8, 24)] w) mem))
  2874  	&& x.Uses == 1
  2875  	&& s == nil
  2876  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2877  	&& clobber(x)
  2878  	=> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
  2879  
  2880  // FP simplification
  2881  (FNEGS (FMULS x y)) => (FNMULS x y)
  2882  (FNEGD (FMULD x y)) => (FNMULD x y)
  2883  (FMULS (FNEGS x) y) => (FNMULS x y)
  2884  (FMULD (FNEGD x) y) => (FNMULD x y)
  2885  (FNEGS (FNMULS x y)) => (FMULS x y)
  2886  (FNEGD (FNMULD x y)) => (FMULD x y)
  2887  (FNMULS (FNEGS x) y) => (FMULS x y)
  2888  (FNMULD (FNEGD x) y) => (FMULD x y)
  2889  (FADDS a (FMULS x y)) => (FMADDS a x y)
  2890  (FADDD a (FMULD x y)) => (FMADDD a x y)
  2891  (FSUBS a (FMULS x y)) => (FMSUBS a x y)
  2892  (FSUBD a (FMULD x y)) => (FMSUBD a x y)
  2893  (FSUBS (FMULS x y) a) => (FNMSUBS a x y)
  2894  (FSUBD (FMULD x y) a) => (FNMSUBD a x y)
  2895  (FADDS a (FNMULS x y)) => (FMSUBS a x y)
  2896  (FADDD a (FNMULD x y)) => (FMSUBD a x y)
  2897  (FSUBS a (FNMULS x y)) => (FMADDS a x y)
  2898  (FSUBD a (FNMULD x y)) => (FMADDD a x y)
  2899  (FSUBS (FNMULS x y) a) => (FNMADDS a x y)
  2900  (FSUBD (FNMULD x y) a) => (FNMADDD a x y)
  2901  
  2902  (MOVBUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read8(sym, int64(off)))])
  2903  (MOVHUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))])
  2904  (MOVWUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))])
  2905  (MOVDload  [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read64(sym, int64(off), config.ctxt.Arch.ByteOrder))])
  2906  
  2907  // Prefetch instructions (aux is option: 0 - PLDL1KEEP; 1 - PLDL1STRM)
  2908  (PrefetchCache addr mem)         => (PRFM [0] addr mem)
  2909  (PrefetchCacheStreamed addr mem) => (PRFM [1] addr mem)
  2910  
  2911  // Arch-specific inlining for small or disjoint runtime.memmove
  2912  (SelectN [0] call:(CALLstatic {sym} s1:(MOVDstore _ (MOVDconst [sz]) s2:(MOVDstore  _ src s3:(MOVDstore {t} _ dst mem)))))
  2913  	&& sz >= 0
  2914  	&& isSameCall(sym, "runtime.memmove")
  2915  	&& s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1
  2916  	&& isInlinableMemmove(dst, src, sz, config)
  2917  	&& clobber(s1, s2, s3, call)
  2918  	=> (Move [sz] dst src mem)
  2919  
  2920  // Match post-lowering calls, register version.
  2921  (SelectN [0] call:(CALLstatic {sym} dst src (MOVDconst [sz]) mem))
  2922  	&& sz >= 0
  2923  	&& isSameCall(sym, "runtime.memmove")
  2924  	&& call.Uses == 1
  2925  	&& isInlinableMemmove(dst, src, sz, config)
  2926  	&& clobber(call)
  2927  	=> (Move [sz] dst src mem)
  2928  
  2929  ((REV|REVW) ((REV|REVW) p)) => p
  2930  

View as plain text