1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Optimizations TODO:
6 // * Use SLTI and SLTIU for comparisons to constants, instead of SLT/SLTU with constants in registers
7 // * Use the zero register instead of moving 0 into a register.
8 // * Add rules to avoid generating a temp bool value for (If (SLT[U] ...) ...).
9 // * Arrange for non-trivial Zero and Move lowerings to use aligned loads and stores.
10 // * Avoid using Neq32 for writeBarrier.enabled checks.
11
12 // Lowering arithmetic
13 (Add64 ...) => (ADD ...)
14 (AddPtr ...) => (ADD ...)
15 (Add32 ...) => (ADD ...)
16 (Add16 ...) => (ADD ...)
17 (Add8 ...) => (ADD ...)
18 (Add32F ...) => (FADDS ...)
19 (Add64F ...) => (FADDD ...)
20
21 (Sub64 ...) => (SUB ...)
22 (SubPtr ...) => (SUB ...)
23 (Sub32 ...) => (SUB ...)
24 (Sub16 ...) => (SUB ...)
25 (Sub8 ...) => (SUB ...)
26 (Sub32F ...) => (FSUBS ...)
27 (Sub64F ...) => (FSUBD ...)
28
29 (Mul64 ...) => (MUL ...)
30 (Mul64uhilo ...) => (LoweredMuluhilo ...)
31 (Mul64uover ...) => (LoweredMuluover ...)
32 (Mul32 ...) => (MULW ...)
33 (Mul16 x y) => (MULW (SignExt16to32 x) (SignExt16to32 y))
34 (Mul8 x y) => (MULW (SignExt8to32 x) (SignExt8to32 y))
35 (Mul32F ...) => (FMULS ...)
36 (Mul64F ...) => (FMULD ...)
37
38 (Div32F ...) => (FDIVS ...)
39 (Div64F ...) => (FDIVD ...)
40
41 (Div64 x y [false]) => (DIV x y)
42 (Div64u ...) => (DIVU ...)
43 (Div32 x y [false]) => (DIVW x y)
44 (Div32u ...) => (DIVUW ...)
45 (Div16 x y [false]) => (DIVW (SignExt16to32 x) (SignExt16to32 y))
46 (Div16u x y) => (DIVUW (ZeroExt16to32 x) (ZeroExt16to32 y))
47 (Div8 x y) => (DIVW (SignExt8to32 x) (SignExt8to32 y))
48 (Div8u x y) => (DIVUW (ZeroExt8to32 x) (ZeroExt8to32 y))
49
50 (Hmul64 ...) => (MULH ...)
51 (Hmul64u ...) => (MULHU ...)
52 (Hmul32 x y) => (SRAI [32] (MUL (SignExt32to64 x) (SignExt32to64 y)))
53 (Hmul32u x y) => (SRLI [32] (MUL (ZeroExt32to64 x) (ZeroExt32to64 y)))
54
55 // (x + y) / 2 => (x / 2) + (y / 2) + (x & y & 1)
56 (Avg64u <t> x y) => (ADD (ADD <t> (SRLI <t> [1] x) (SRLI <t> [1] y)) (ANDI <t> [1] (AND <t> x y)))
57
58 (Mod64 x y [false]) => (REM x y)
59 (Mod64u ...) => (REMU ...)
60 (Mod32 x y [false]) => (REMW x y)
61 (Mod32u ...) => (REMUW ...)
62 (Mod16 x y [false]) => (REMW (SignExt16to32 x) (SignExt16to32 y))
63 (Mod16u x y) => (REMUW (ZeroExt16to32 x) (ZeroExt16to32 y))
64 (Mod8 x y) => (REMW (SignExt8to32 x) (SignExt8to32 y))
65 (Mod8u x y) => (REMUW (ZeroExt8to32 x) (ZeroExt8to32 y))
66
67 (And64 ...) => (AND ...)
68 (And32 ...) => (AND ...)
69 (And16 ...) => (AND ...)
70 (And8 ...) => (AND ...)
71
72 (Or64 ...) => (OR ...)
73 (Or32 ...) => (OR ...)
74 (Or16 ...) => (OR ...)
75 (Or8 ...) => (OR ...)
76
77 (Xor64 ...) => (XOR ...)
78 (Xor32 ...) => (XOR ...)
79 (Xor16 ...) => (XOR ...)
80 (Xor8 ...) => (XOR ...)
81
82 (Neg64 ...) => (NEG ...)
83 (Neg32 ...) => (NEG ...)
84 (Neg16 ...) => (NEG ...)
85 (Neg8 ...) => (NEG ...)
86 (Neg32F ...) => (FNEGS ...)
87 (Neg64F ...) => (FNEGD ...)
88
89 (Com64 ...) => (NOT ...)
90 (Com32 ...) => (NOT ...)
91 (Com16 ...) => (NOT ...)
92 (Com8 ...) => (NOT ...)
93
94 (Sqrt ...) => (FSQRTD ...)
95 (Sqrt32 ...) => (FSQRTS ...)
96
97 (Copysign ...) => (FSGNJD ...)
98
99 (Abs ...) => (FABSD ...)
100
101 (FMA ...) => (FMADDD ...)
102
103 // Sign and zero extension.
104
105 (SignExt8to16 ...) => (MOVBreg ...)
106 (SignExt8to32 ...) => (MOVBreg ...)
107 (SignExt8to64 ...) => (MOVBreg ...)
108 (SignExt16to32 ...) => (MOVHreg ...)
109 (SignExt16to64 ...) => (MOVHreg ...)
110 (SignExt32to64 ...) => (MOVWreg ...)
111
112 (ZeroExt8to16 ...) => (MOVBUreg ...)
113 (ZeroExt8to32 ...) => (MOVBUreg ...)
114 (ZeroExt8to64 ...) => (MOVBUreg ...)
115 (ZeroExt16to32 ...) => (MOVHUreg ...)
116 (ZeroExt16to64 ...) => (MOVHUreg ...)
117 (ZeroExt32to64 ...) => (MOVWUreg ...)
118
119 (Cvt32to32F ...) => (FCVTSW ...)
120 (Cvt32to64F ...) => (FCVTDW ...)
121 (Cvt64to32F ...) => (FCVTSL ...)
122 (Cvt64to64F ...) => (FCVTDL ...)
123
124 (Cvt32Fto32 ...) => (FCVTWS ...)
125 (Cvt32Fto64 ...) => (FCVTLS ...)
126 (Cvt64Fto32 ...) => (FCVTWD ...)
127 (Cvt64Fto64 ...) => (FCVTLD ...)
128
129 (Cvt32Fto64F ...) => (FCVTDS ...)
130 (Cvt64Fto32F ...) => (FCVTSD ...)
131
132 (CvtBoolToUint8 ...) => (Copy ...)
133
134 (Round32F ...) => (Copy ...)
135 (Round64F ...) => (Copy ...)
136
137 // From genericOps.go:
138 // "0 if arg0 == 0, -1 if arg0 > 0, undef if arg0<0"
139 //
140 // Like other arches, we compute ~((x-1) >> 63), with arithmetic right shift.
141 // For positive x, bit 63 of x-1 is always 0, so the result is -1.
142 // For zero x, bit 63 of x-1 is 1, so the result is 0.
143 //
144 (Slicemask <t> x) => (NOT (SRAI <t> [63] (ADDI <t> [-1] x)))
145
146 // Truncations
147 // We ignore the unused high parts of registers, so truncates are just copies.
148 (Trunc16to8 ...) => (Copy ...)
149 (Trunc32to8 ...) => (Copy ...)
150 (Trunc32to16 ...) => (Copy ...)
151 (Trunc64to8 ...) => (Copy ...)
152 (Trunc64to16 ...) => (Copy ...)
153 (Trunc64to32 ...) => (Copy ...)
154
155 // Shifts
156
157 // SLL only considers the bottom 6 bits of y. If y > 64, the result should
158 // always be 0.
159 //
160 // Breaking down the operation:
161 //
162 // (SLL x y) generates x << (y & 63).
163 //
164 // If y < 64, this is the value we want. Otherwise, we want zero.
165 //
166 // So, we AND with -1 * uint64(y < 64), which is 0xfffff... if y < 64 and 0 otherwise.
167 (Lsh8x8 <t> x y) => (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
168 (Lsh8x16 <t> x y) => (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
169 (Lsh8x32 <t> x y) => (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
170 (Lsh8x64 <t> x y) => (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] y)))
171 (Lsh16x8 <t> x y) => (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
172 (Lsh16x16 <t> x y) => (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
173 (Lsh16x32 <t> x y) => (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
174 (Lsh16x64 <t> x y) => (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] y)))
175 (Lsh32x8 <t> x y) => (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
176 (Lsh32x16 <t> x y) => (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
177 (Lsh32x32 <t> x y) => (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
178 (Lsh32x64 <t> x y) => (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] y)))
179 (Lsh64x8 <t> x y) => (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
180 (Lsh64x16 <t> x y) => (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
181 (Lsh64x32 <t> x y) => (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
182 (Lsh64x64 <t> x y) => (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] y)))
183
184 // SRL only considers the bottom 6 bits of y. If y > 64, the result should
185 // always be 0. See Lsh above for a detailed description.
186 (Rsh8Ux8 <t> x y) => (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
187 (Rsh8Ux16 <t> x y) => (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
188 (Rsh8Ux32 <t> x y) => (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
189 (Rsh8Ux64 <t> x y) => (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] y)))
190 (Rsh16Ux8 <t> x y) => (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
191 (Rsh16Ux16 <t> x y) => (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
192 (Rsh16Ux32 <t> x y) => (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
193 (Rsh16Ux64 <t> x y) => (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] y)))
194 (Rsh32Ux8 <t> x y) => (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
195 (Rsh32Ux16 <t> x y) => (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
196 (Rsh32Ux32 <t> x y) => (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
197 (Rsh32Ux64 <t> x y) => (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] y)))
198 (Rsh64Ux8 <t> x y) => (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
199 (Rsh64Ux16 <t> x y) => (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
200 (Rsh64Ux32 <t> x y) => (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
201 (Rsh64Ux64 <t> x y) => (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] y)))
202
203 // SRA only considers the bottom 6 bits of y. If y > 64, the result should
204 // be either 0 or -1 based on the sign bit.
205 //
206 // We implement this by performing the max shift (-1) if y >= 64.
207 //
208 // We OR (uint64(y < 64) - 1) into y before passing it to SRA. This leaves
209 // us with -1 (0xffff...) if y >= 64.
210 //
211 // We don't need to sign-extend the OR result, as it will be at minimum 8 bits,
212 // more than the 6 bits SRA cares about.
213 (Rsh8x8 <t> x y) => (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
214 (Rsh8x16 <t> x y) => (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
215 (Rsh8x32 <t> x y) => (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
216 (Rsh8x64 <t> x y) => (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
217 (Rsh16x8 <t> x y) => (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
218 (Rsh16x16 <t> x y) => (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
219 (Rsh16x32 <t> x y) => (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
220 (Rsh16x64 <t> x y) => (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
221 (Rsh32x8 <t> x y) => (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
222 (Rsh32x16 <t> x y) => (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
223 (Rsh32x32 <t> x y) => (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
224 (Rsh32x64 <t> x y) => (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
225 (Rsh64x8 <t> x y) => (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
226 (Rsh64x16 <t> x y) => (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
227 (Rsh64x32 <t> x y) => (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
228 (Rsh64x64 <t> x y) => (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
229
230 // Rotates.
231 (RotateLeft8 <t> x (MOVDconst [c])) => (Or8 (Lsh8x64 <t> x (MOVDconst [c&7])) (Rsh8Ux64 <t> x (MOVDconst [-c&7])))
232 (RotateLeft16 <t> x (MOVDconst [c])) => (Or16 (Lsh16x64 <t> x (MOVDconst [c&15])) (Rsh16Ux64 <t> x (MOVDconst [-c&15])))
233 (RotateLeft32 <t> x (MOVDconst [c])) => (Or32 (Lsh32x64 <t> x (MOVDconst [c&31])) (Rsh32Ux64 <t> x (MOVDconst [-c&31])))
234 (RotateLeft64 <t> x (MOVDconst [c])) => (Or64 (Lsh64x64 <t> x (MOVDconst [c&63])) (Rsh64Ux64 <t> x (MOVDconst [-c&63])))
235
236 (Less64 ...) => (SLT ...)
237 (Less32 x y) => (SLT (SignExt32to64 x) (SignExt32to64 y))
238 (Less16 x y) => (SLT (SignExt16to64 x) (SignExt16to64 y))
239 (Less8 x y) => (SLT (SignExt8to64 x) (SignExt8to64 y))
240 (Less64U ...) => (SLTU ...)
241 (Less32U x y) => (SLTU (ZeroExt32to64 x) (ZeroExt32to64 y))
242 (Less16U x y) => (SLTU (ZeroExt16to64 x) (ZeroExt16to64 y))
243 (Less8U x y) => (SLTU (ZeroExt8to64 x) (ZeroExt8to64 y))
244 (Less64F ...) => (FLTD ...)
245 (Less32F ...) => (FLTS ...)
246
247 // Convert x <= y to !(y > x).
248 (Leq64 x y) => (Not (Less64 y x))
249 (Leq32 x y) => (Not (Less32 y x))
250 (Leq16 x y) => (Not (Less16 y x))
251 (Leq8 x y) => (Not (Less8 y x))
252 (Leq64U x y) => (Not (Less64U y x))
253 (Leq32U x y) => (Not (Less32U y x))
254 (Leq16U x y) => (Not (Less16U y x))
255 (Leq8U x y) => (Not (Less8U y x))
256 (Leq64F ...) => (FLED ...)
257 (Leq32F ...) => (FLES ...)
258
259 (EqPtr x y) => (SEQZ (SUB <typ.Uintptr> x y))
260 (Eq64 x y) => (SEQZ (SUB <x.Type> x y))
261 (Eq32 x y) => (SEQZ (SUB <x.Type> (ZeroExt32to64 x) (ZeroExt32to64 y)))
262 (Eq16 x y) => (SEQZ (SUB <x.Type> (ZeroExt16to64 x) (ZeroExt16to64 y)))
263 (Eq8 x y) => (SEQZ (SUB <x.Type> (ZeroExt8to64 x) (ZeroExt8to64 y)))
264 (Eq64F ...) => (FEQD ...)
265 (Eq32F ...) => (FEQS ...)
266
267 (NeqPtr x y) => (SNEZ (SUB <typ.Uintptr> x y))
268 (Neq64 x y) => (SNEZ (SUB <x.Type> x y))
269 (Neq32 x y) => (SNEZ (SUB <x.Type> (ZeroExt32to64 x) (ZeroExt32to64 y)))
270 (Neq16 x y) => (SNEZ (SUB <x.Type> (ZeroExt16to64 x) (ZeroExt16to64 y)))
271 (Neq8 x y) => (SNEZ (SUB <x.Type> (ZeroExt8to64 x) (ZeroExt8to64 y)))
272 (Neq64F ...) => (FNED ...)
273 (Neq32F ...) => (FNES ...)
274
275 // Loads
276 (Load <t> ptr mem) && t.IsBoolean() => (MOVBUload ptr mem)
277 (Load <t> ptr mem) && ( is8BitInt(t) && isSigned(t)) => (MOVBload ptr mem)
278 (Load <t> ptr mem) && ( is8BitInt(t) && !isSigned(t)) => (MOVBUload ptr mem)
279 (Load <t> ptr mem) && (is16BitInt(t) && isSigned(t)) => (MOVHload ptr mem)
280 (Load <t> ptr mem) && (is16BitInt(t) && !isSigned(t)) => (MOVHUload ptr mem)
281 (Load <t> ptr mem) && (is32BitInt(t) && isSigned(t)) => (MOVWload ptr mem)
282 (Load <t> ptr mem) && (is32BitInt(t) && !isSigned(t)) => (MOVWUload ptr mem)
283 (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) => (MOVDload ptr mem)
284 (Load <t> ptr mem) && is32BitFloat(t) => (FMOVWload ptr mem)
285 (Load <t> ptr mem) && is64BitFloat(t) => (FMOVDload ptr mem)
286
287 // Stores
288 (Store {t} ptr val mem) && t.Size() == 1 => (MOVBstore ptr val mem)
289 (Store {t} ptr val mem) && t.Size() == 2 => (MOVHstore ptr val mem)
290 (Store {t} ptr val mem) && t.Size() == 4 && !is32BitFloat(val.Type) => (MOVWstore ptr val mem)
291 (Store {t} ptr val mem) && t.Size() == 8 && !is64BitFloat(val.Type) => (MOVDstore ptr val mem)
292 (Store {t} ptr val mem) && t.Size() == 4 && is32BitFloat(val.Type) => (FMOVWstore ptr val mem)
293 (Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (FMOVDstore ptr val mem)
294
295 // We need to fold MOVaddr into the LD/MOVDstore ops so that the live variable analysis
296 // knows what variables are being read/written by the ops.
297 (MOVBUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
298 (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} base mem)
299 (MOVBload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
300 (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
301 (MOVHUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
302 (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} base mem)
303 (MOVHload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
304 (MOVHload [off1+off2] {mergeSym(sym1,sym2)} base mem)
305 (MOVWUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
306 (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} base mem)
307 (MOVWload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
308 (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
309 (MOVDload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
310 (MOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
311
312 (MOVBstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
313 (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
314 (MOVHstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
315 (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
316 (MOVWstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
317 (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
318 (MOVDstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
319 (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
320 (MOVBstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
321 (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
322 (MOVHstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
323 (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
324 (MOVWstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
325 (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
326 (MOVDstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
327 (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
328
329 (MOVBUload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) =>
330 (MOVBUload [off1+int32(off2)] {sym} base mem)
331 (MOVBload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) =>
332 (MOVBload [off1+int32(off2)] {sym} base mem)
333 (MOVHUload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) =>
334 (MOVHUload [off1+int32(off2)] {sym} base mem)
335 (MOVHload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) =>
336 (MOVHload [off1+int32(off2)] {sym} base mem)
337 (MOVWUload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) =>
338 (MOVWUload [off1+int32(off2)] {sym} base mem)
339 (MOVWload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) =>
340 (MOVWload [off1+int32(off2)] {sym} base mem)
341 (MOVDload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) =>
342 (MOVDload [off1+int32(off2)] {sym} base mem)
343
344 (MOVBstore [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(int64(off1)+off2) =>
345 (MOVBstore [off1+int32(off2)] {sym} base val mem)
346 (MOVHstore [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(int64(off1)+off2) =>
347 (MOVHstore [off1+int32(off2)] {sym} base val mem)
348 (MOVWstore [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(int64(off1)+off2) =>
349 (MOVWstore [off1+int32(off2)] {sym} base val mem)
350 (MOVDstore [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(int64(off1)+off2) =>
351 (MOVDstore [off1+int32(off2)] {sym} base val mem)
352 (MOVBstorezero [off1] {sym} (ADDI [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVBstorezero [off1+int32(off2)] {sym} ptr mem)
353 (MOVHstorezero [off1] {sym} (ADDI [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVHstorezero [off1+int32(off2)] {sym} ptr mem)
354 (MOVWstorezero [off1] {sym} (ADDI [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVWstorezero [off1+int32(off2)] {sym} ptr mem)
355 (MOVDstorezero [off1] {sym} (ADDI [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVDstorezero [off1+int32(off2)] {sym} ptr mem)
356
357 // Similarly, fold ADDI into MOVaddr to avoid confusing live variable analysis
358 // with OffPtr -> ADDI.
359 (ADDI [c] (MOVaddr [d] {s} x)) && is32Bit(c+int64(d)) => (MOVaddr [int32(c)+d] {s} x)
360
361 // Small zeroing
362 (Zero [0] _ mem) => mem
363 (Zero [1] ptr mem) => (MOVBstore ptr (MOVDconst [0]) mem)
364 (Zero [2] {t} ptr mem) && t.Alignment()%2 == 0 =>
365 (MOVHstore ptr (MOVDconst [0]) mem)
366 (Zero [2] ptr mem) =>
367 (MOVBstore [1] ptr (MOVDconst [0])
368 (MOVBstore ptr (MOVDconst [0]) mem))
369 (Zero [4] {t} ptr mem) && t.Alignment()%4 == 0 =>
370 (MOVWstore ptr (MOVDconst [0]) mem)
371 (Zero [4] {t} ptr mem) && t.Alignment()%2 == 0 =>
372 (MOVHstore [2] ptr (MOVDconst [0])
373 (MOVHstore ptr (MOVDconst [0]) mem))
374 (Zero [4] ptr mem) =>
375 (MOVBstore [3] ptr (MOVDconst [0])
376 (MOVBstore [2] ptr (MOVDconst [0])
377 (MOVBstore [1] ptr (MOVDconst [0])
378 (MOVBstore ptr (MOVDconst [0]) mem))))
379 (Zero [8] {t} ptr mem) && t.Alignment()%8 == 0 =>
380 (MOVDstore ptr (MOVDconst [0]) mem)
381 (Zero [8] {t} ptr mem) && t.Alignment()%4 == 0 =>
382 (MOVWstore [4] ptr (MOVDconst [0])
383 (MOVWstore ptr (MOVDconst [0]) mem))
384 (Zero [8] {t} ptr mem) && t.Alignment()%2 == 0 =>
385 (MOVHstore [6] ptr (MOVDconst [0])
386 (MOVHstore [4] ptr (MOVDconst [0])
387 (MOVHstore [2] ptr (MOVDconst [0])
388 (MOVHstore ptr (MOVDconst [0]) mem))))
389
390 (Zero [3] ptr mem) =>
391 (MOVBstore [2] ptr (MOVDconst [0])
392 (MOVBstore [1] ptr (MOVDconst [0])
393 (MOVBstore ptr (MOVDconst [0]) mem)))
394 (Zero [6] {t} ptr mem) && t.Alignment()%2 == 0 =>
395 (MOVHstore [4] ptr (MOVDconst [0])
396 (MOVHstore [2] ptr (MOVDconst [0])
397 (MOVHstore ptr (MOVDconst [0]) mem)))
398 (Zero [12] {t} ptr mem) && t.Alignment()%4 == 0 =>
399 (MOVWstore [8] ptr (MOVDconst [0])
400 (MOVWstore [4] ptr (MOVDconst [0])
401 (MOVWstore ptr (MOVDconst [0]) mem)))
402 (Zero [16] {t} ptr mem) && t.Alignment()%8 == 0 =>
403 (MOVDstore [8] ptr (MOVDconst [0])
404 (MOVDstore ptr (MOVDconst [0]) mem))
405 (Zero [24] {t} ptr mem) && t.Alignment()%8 == 0 =>
406 (MOVDstore [16] ptr (MOVDconst [0])
407 (MOVDstore [8] ptr (MOVDconst [0])
408 (MOVDstore ptr (MOVDconst [0]) mem)))
409 (Zero [32] {t} ptr mem) && t.Alignment()%8 == 0 =>
410 (MOVDstore [24] ptr (MOVDconst [0])
411 (MOVDstore [16] ptr (MOVDconst [0])
412 (MOVDstore [8] ptr (MOVDconst [0])
413 (MOVDstore ptr (MOVDconst [0]) mem))))
414
415 // Medium 8-aligned zeroing uses a Duff's device
416 // 8 and 128 are magic constants, see runtime/mkduff.go
417 (Zero [s] {t} ptr mem)
418 && s%8 == 0 && s <= 8*128
419 && t.Alignment()%8 == 0 && !config.noDuffDevice =>
420 (DUFFZERO [8 * (128 - s/8)] ptr mem)
421
422 // Generic zeroing uses a loop
423 (Zero [s] {t} ptr mem) =>
424 (LoweredZero [t.Alignment()]
425 ptr
426 (ADD <ptr.Type> ptr (MOVDconst [s-moveSize(t.Alignment(), config)]))
427 mem)
428
429 (Convert ...) => (MOVconvert ...)
430
431 // Checks
432 (IsNonNil ...) => (SNEZ ...)
433 (IsInBounds ...) => (Less64U ...)
434 (IsSliceInBounds ...) => (Leq64U ...)
435
436 // Trivial lowering
437 (NilCheck ...) => (LoweredNilCheck ...)
438 (GetClosurePtr ...) => (LoweredGetClosurePtr ...)
439 (GetCallerSP ...) => (LoweredGetCallerSP ...)
440 (GetCallerPC ...) => (LoweredGetCallerPC ...)
441
442 // Write barrier.
443 (WB ...) => (LoweredWB ...)
444
445 (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
446 (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
447 (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
448
449 // Small moves
450 (Move [0] _ _ mem) => mem
451 (Move [1] dst src mem) => (MOVBstore dst (MOVBload src mem) mem)
452 (Move [2] {t} dst src mem) && t.Alignment()%2 == 0 =>
453 (MOVHstore dst (MOVHload src mem) mem)
454 (Move [2] dst src mem) =>
455 (MOVBstore [1] dst (MOVBload [1] src mem)
456 (MOVBstore dst (MOVBload src mem) mem))
457 (Move [4] {t} dst src mem) && t.Alignment()%4 == 0 =>
458 (MOVWstore dst (MOVWload src mem) mem)
459 (Move [4] {t} dst src mem) && t.Alignment()%2 == 0 =>
460 (MOVHstore [2] dst (MOVHload [2] src mem)
461 (MOVHstore dst (MOVHload src mem) mem))
462 (Move [4] dst src mem) =>
463 (MOVBstore [3] dst (MOVBload [3] src mem)
464 (MOVBstore [2] dst (MOVBload [2] src mem)
465 (MOVBstore [1] dst (MOVBload [1] src mem)
466 (MOVBstore dst (MOVBload src mem) mem))))
467 (Move [8] {t} dst src mem) && t.Alignment()%8 == 0 =>
468 (MOVDstore dst (MOVDload src mem) mem)
469 (Move [8] {t} dst src mem) && t.Alignment()%4 == 0 =>
470 (MOVWstore [4] dst (MOVWload [4] src mem)
471 (MOVWstore dst (MOVWload src mem) mem))
472 (Move [8] {t} dst src mem) && t.Alignment()%2 == 0 =>
473 (MOVHstore [6] dst (MOVHload [6] src mem)
474 (MOVHstore [4] dst (MOVHload [4] src mem)
475 (MOVHstore [2] dst (MOVHload [2] src mem)
476 (MOVHstore dst (MOVHload src mem) mem))))
477
478 (Move [3] dst src mem) =>
479 (MOVBstore [2] dst (MOVBload [2] src mem)
480 (MOVBstore [1] dst (MOVBload [1] src mem)
481 (MOVBstore dst (MOVBload src mem) mem)))
482 (Move [6] {t} dst src mem) && t.Alignment()%2 == 0 =>
483 (MOVHstore [4] dst (MOVHload [4] src mem)
484 (MOVHstore [2] dst (MOVHload [2] src mem)
485 (MOVHstore dst (MOVHload src mem) mem)))
486 (Move [12] {t} dst src mem) && t.Alignment()%4 == 0 =>
487 (MOVWstore [8] dst (MOVWload [8] src mem)
488 (MOVWstore [4] dst (MOVWload [4] src mem)
489 (MOVWstore dst (MOVWload src mem) mem)))
490 (Move [16] {t} dst src mem) && t.Alignment()%8 == 0 =>
491 (MOVDstore [8] dst (MOVDload [8] src mem)
492 (MOVDstore dst (MOVDload src mem) mem))
493 (Move [24] {t} dst src mem) && t.Alignment()%8 == 0 =>
494 (MOVDstore [16] dst (MOVDload [16] src mem)
495 (MOVDstore [8] dst (MOVDload [8] src mem)
496 (MOVDstore dst (MOVDload src mem) mem)))
497 (Move [32] {t} dst src mem) && t.Alignment()%8 == 0 =>
498 (MOVDstore [24] dst (MOVDload [24] src mem)
499 (MOVDstore [16] dst (MOVDload [16] src mem)
500 (MOVDstore [8] dst (MOVDload [8] src mem)
501 (MOVDstore dst (MOVDload src mem) mem))))
502
503 // Medium 8-aligned move uses a Duff's device
504 // 16 and 128 are magic constants, see runtime/mkduff.go
505 (Move [s] {t} dst src mem)
506 && s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0
507 && !config.noDuffDevice && logLargeCopy(v, s) =>
508 (DUFFCOPY [16 * (128 - s/8)] dst src mem)
509
510 // Generic move uses a loop
511 (Move [s] {t} dst src mem) && (s <= 16 || logLargeCopy(v, s)) =>
512 (LoweredMove [t.Alignment()]
513 dst
514 src
515 (ADDI <src.Type> [s-moveSize(t.Alignment(), config)] src)
516 mem)
517
518 // Boolean ops; 0=false, 1=true
519 (AndB ...) => (AND ...)
520 (OrB ...) => (OR ...)
521 (EqB x y) => (SEQZ (XOR <typ.Bool> x y))
522 (NeqB ...) => (XOR ...)
523 (Not ...) => (SEQZ ...)
524
525 // Lowering pointer arithmetic
526 // TODO: Special handling for SP offsets, like ARM
527 (OffPtr [off] ptr:(SP)) && is32Bit(off) => (MOVaddr [int32(off)] ptr)
528 (OffPtr [off] ptr) && is32Bit(off) => (ADDI [off] ptr)
529 (OffPtr [off] ptr) => (ADD (MOVDconst [off]) ptr)
530
531 (Const8 [val]) => (MOVDconst [int64(val)])
532 (Const16 [val]) => (MOVDconst [int64(val)])
533 (Const32 [val]) => (MOVDconst [int64(val)])
534 (Const64 [val]) => (MOVDconst [int64(val)])
535 (Const32F [val]) => (FMVSX (MOVDconst [int64(math.Float32bits(val))]))
536 (Const64F [val]) => (FMVDX (MOVDconst [int64(math.Float64bits(val))]))
537 (ConstNil) => (MOVDconst [0])
538 (ConstBool [val]) => (MOVDconst [int64(b2i(val))])
539
540 (Addr {sym} base) => (MOVaddr {sym} [0] base)
541 (LocalAddr {sym} base _) => (MOVaddr {sym} base)
542
543 // Calls
544 (StaticCall ...) => (CALLstatic ...)
545 (ClosureCall ...) => (CALLclosure ...)
546 (InterCall ...) => (CALLinter ...)
547 (TailCall ...) => (CALLtail ...)
548
549 // Atomic Intrinsics
550 (AtomicLoad8 ...) => (LoweredAtomicLoad8 ...)
551 (AtomicLoad32 ...) => (LoweredAtomicLoad32 ...)
552 (AtomicLoad64 ...) => (LoweredAtomicLoad64 ...)
553 (AtomicLoadPtr ...) => (LoweredAtomicLoad64 ...)
554
555 (AtomicStore8 ...) => (LoweredAtomicStore8 ...)
556 (AtomicStore32 ...) => (LoweredAtomicStore32 ...)
557 (AtomicStore64 ...) => (LoweredAtomicStore64 ...)
558 (AtomicStorePtrNoWB ...) => (LoweredAtomicStore64 ...)
559
560 (AtomicAdd32 ...) => (LoweredAtomicAdd32 ...)
561 (AtomicAdd64 ...) => (LoweredAtomicAdd64 ...)
562
563 // AtomicAnd8(ptr,val) => LoweredAtomicAnd32(ptr&^3, ^((uint8(val) ^ 0xff) << ((ptr & 3) * 8)))
564 (AtomicAnd8 ptr val mem) =>
565 (LoweredAtomicAnd32 (ANDI <typ.Uintptr> [^3] ptr)
566 (NOT <typ.UInt32> (SLL <typ.UInt32> (XORI <typ.UInt32> [0xff] (ZeroExt8to32 val))
567 (SLLI <typ.UInt64> [3] (ANDI <typ.UInt64> [3] ptr)))) mem)
568
569 (AtomicAnd32 ...) => (LoweredAtomicAnd32 ...)
570
571 (AtomicCompareAndSwap32 ...) => (LoweredAtomicCas32 ...)
572 (AtomicCompareAndSwap64 ...) => (LoweredAtomicCas64 ...)
573
574 (AtomicExchange32 ...) => (LoweredAtomicExchange32 ...)
575 (AtomicExchange64 ...) => (LoweredAtomicExchange64 ...)
576
577 // AtomicOr8(ptr,val) => LoweredAtomicOr32(ptr&^3, uint32(val)<<((ptr&3)*8))
578 (AtomicOr8 ptr val mem) =>
579 (LoweredAtomicOr32 (ANDI <typ.Uintptr> [^3] ptr)
580 (SLL <typ.UInt32> (ZeroExt8to32 val)
581 (SLLI <typ.UInt64> [3] (ANDI <typ.UInt64> [3] ptr))) mem)
582
583 (AtomicOr32 ...) => (LoweredAtomicOr32 ...)
584
585 // Conditional branches
586 (If cond yes no) => (BNEZ cond yes no)
587
588 // Optimizations
589
590 // Absorb SEQZ/SNEZ into branch.
591 (BEQZ (SEQZ x) yes no) => (BNEZ x yes no)
592 (BEQZ (SNEZ x) yes no) => (BEQZ x yes no)
593 (BNEZ (SEQZ x) yes no) => (BEQZ x yes no)
594 (BNEZ (SNEZ x) yes no) => (BNEZ x yes no)
595
596 // Absorb NEG into branch when possible.
597 (BEQZ x:(NEG y) yes no) && x.Uses == 1 => (BEQZ y yes no)
598 (BNEZ x:(NEG y) yes no) && x.Uses == 1 => (BNEZ y yes no)
599
600 // Convert BEQZ/BNEZ into more optimal branch conditions.
601 (BEQZ (SUB x y) yes no) => (BEQ x y yes no)
602 (BNEZ (SUB x y) yes no) => (BNE x y yes no)
603 (BEQZ (SLT x y) yes no) => (BGE x y yes no)
604 (BNEZ (SLT x y) yes no) => (BLT x y yes no)
605 (BEQZ (SLTU x y) yes no) => (BGEU x y yes no)
606 (BNEZ (SLTU x y) yes no) => (BLTU x y yes no)
607
608 // Convert branch with zero to more optimal branch zero.
609 (BEQ (MOVDconst [0]) cond yes no) => (BEQZ cond yes no)
610 (BEQ cond (MOVDconst [0]) yes no) => (BEQZ cond yes no)
611 (BNE (MOVDconst [0]) cond yes no) => (BNEZ cond yes no)
612 (BNE cond (MOVDconst [0]) yes no) => (BNEZ cond yes no)
613 (BLT (MOVDconst [0]) cond yes no) => (BGTZ cond yes no)
614 (BLT cond (MOVDconst [0]) yes no) => (BLTZ cond yes no)
615 (BGE (MOVDconst [0]) cond yes no) => (BLEZ cond yes no)
616 (BGE cond (MOVDconst [0]) yes no) => (BGEZ cond yes no)
617
618 // Store zero
619 (MOVBstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVBstorezero [off] {sym} ptr mem)
620 (MOVHstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVHstorezero [off] {sym} ptr mem)
621 (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVWstorezero [off] {sym} ptr mem)
622 (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVDstorezero [off] {sym} ptr mem)
623
624 // Avoid sign/zero extension for consts.
625 (MOVBreg (MOVDconst [c])) => (MOVDconst [int64(int8(c))])
626 (MOVHreg (MOVDconst [c])) => (MOVDconst [int64(int16(c))])
627 (MOVWreg (MOVDconst [c])) => (MOVDconst [int64(int32(c))])
628 (MOVBUreg (MOVDconst [c])) => (MOVDconst [int64(uint8(c))])
629 (MOVHUreg (MOVDconst [c])) => (MOVDconst [int64(uint16(c))])
630 (MOVWUreg (MOVDconst [c])) => (MOVDconst [int64(uint32(c))])
631
632 // Avoid sign/zero extension after properly typed load.
633 (MOVBreg x:(MOVBload _ _)) => (MOVDreg x)
634 (MOVHreg x:(MOVBload _ _)) => (MOVDreg x)
635 (MOVHreg x:(MOVBUload _ _)) => (MOVDreg x)
636 (MOVHreg x:(MOVHload _ _)) => (MOVDreg x)
637 (MOVWreg x:(MOVBload _ _)) => (MOVDreg x)
638 (MOVWreg x:(MOVBUload _ _)) => (MOVDreg x)
639 (MOVWreg x:(MOVHload _ _)) => (MOVDreg x)
640 (MOVWreg x:(MOVHUload _ _)) => (MOVDreg x)
641 (MOVWreg x:(MOVWload _ _)) => (MOVDreg x)
642 (MOVBUreg x:(MOVBUload _ _)) => (MOVDreg x)
643 (MOVHUreg x:(MOVBUload _ _)) => (MOVDreg x)
644 (MOVHUreg x:(MOVHUload _ _)) => (MOVDreg x)
645 (MOVWUreg x:(MOVBUload _ _)) => (MOVDreg x)
646 (MOVWUreg x:(MOVHUload _ _)) => (MOVDreg x)
647 (MOVWUreg x:(MOVWUload _ _)) => (MOVDreg x)
648
649 // Fold double extensions.
650 (MOVBreg x:(MOVBreg _)) => (MOVDreg x)
651 (MOVHreg x:(MOVBreg _)) => (MOVDreg x)
652 (MOVHreg x:(MOVBUreg _)) => (MOVDreg x)
653 (MOVHreg x:(MOVHreg _)) => (MOVDreg x)
654 (MOVWreg x:(MOVBreg _)) => (MOVDreg x)
655 (MOVWreg x:(MOVBUreg _)) => (MOVDreg x)
656 (MOVWreg x:(MOVHreg _)) => (MOVDreg x)
657 (MOVWreg x:(MOVWreg _)) => (MOVDreg x)
658 (MOVBUreg x:(MOVBUreg _)) => (MOVDreg x)
659 (MOVHUreg x:(MOVBUreg _)) => (MOVDreg x)
660 (MOVHUreg x:(MOVHUreg _)) => (MOVDreg x)
661 (MOVWUreg x:(MOVBUreg _)) => (MOVDreg x)
662 (MOVWUreg x:(MOVHUreg _)) => (MOVDreg x)
663 (MOVWUreg x:(MOVWUreg _)) => (MOVDreg x)
664
665 // Do not extend before store.
666 (MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
667 (MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
668 (MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
669 (MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
670 (MOVBstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
671 (MOVBstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
672 (MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
673 (MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
674 (MOVHstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
675 (MOVHstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
676 (MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem)
677 (MOVWstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVWstore [off] {sym} ptr x mem)
678
679 // Replace extend after load with alternate load where possible.
680 (MOVBreg <t> x:(MOVBUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <t> [off] {sym} ptr mem)
681 (MOVHreg <t> x:(MOVHUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHload <t> [off] {sym} ptr mem)
682 (MOVWreg <t> x:(MOVWUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <t> [off] {sym} ptr mem)
683 (MOVBUreg <t> x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBUload <t> [off] {sym} ptr mem)
684 (MOVHUreg <t> x:(MOVHload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHUload <t> [off] {sym} ptr mem)
685 (MOVWUreg <t> x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWUload <t> [off] {sym} ptr mem)
686
687 // If a register move has only 1 use, just use the same register without emitting instruction
688 // MOVnop does not emit an instruction, only for ensuring the type.
689 (MOVDreg x) && x.Uses == 1 => (MOVDnop x)
690
691 // TODO: we should be able to get rid of MOVDnop all together.
692 // But for now, this is enough to get rid of lots of them.
693 (MOVDnop (MOVDconst [c])) => (MOVDconst [c])
694
695 // Fold constant into immediate instructions where possible.
696 (ADD (MOVDconst [val]) x) && is32Bit(val) => (ADDI [val] x)
697 (AND (MOVDconst [val]) x) && is32Bit(val) => (ANDI [val] x)
698 (OR (MOVDconst [val]) x) && is32Bit(val) => (ORI [val] x)
699 (XOR (MOVDconst [val]) x) && is32Bit(val) => (XORI [val] x)
700 (SLL x (MOVDconst [val])) => (SLLI [int64(val&63)] x)
701 (SRL x (MOVDconst [val])) => (SRLI [int64(val&63)] x)
702 (SRA x (MOVDconst [val])) => (SRAI [int64(val&63)] x)
703
704 // Convert subtraction of a const into ADDI with negative immediate, where possible.
705 (SUB x (MOVDconst [val])) && is32Bit(-val) => (ADDI [-val] x)
706
707 // Subtraction of zero.
708 (SUB x (MOVDconst [0])) => x
709 (SUBW x (MOVDconst [0])) => (ADDIW [0] x)
710
711 // Subtraction from zero.
712 (SUB (MOVDconst [0]) x) => (NEG x)
713 (SUBW (MOVDconst [0]) x) => (NEGW x)
714
715 // Addition of zero or two constants.
716 (ADDI [0] x) => x
717 (ADDI [x] (MOVDconst [y])) && is32Bit(x + y) => (MOVDconst [x + y])
718
719 // ANDI with all zeros, all ones or two constants.
720 (ANDI [0] x) => (MOVDconst [0])
721 (ANDI [-1] x) => x
722 (ANDI [x] (MOVDconst [y])) => (MOVDconst [x & y])
723
724 // ORI with all zeroes, all ones or two constants.
725 (ORI [0] x) => x
726 (ORI [-1] x) => (MOVDconst [-1])
727 (ORI [x] (MOVDconst [y])) => (MOVDconst [x | y])
728
729 // Negation of a constant.
730 (NEG (MOVDconst [x])) => (MOVDconst [-x])
731 (NEGW (MOVDconst [x])) => (MOVDconst [int64(int32(-x))])
732
733 // Shift of a constant.
734 (SLLI [x] (MOVDconst [y])) && is32Bit(y << x) => (MOVDconst [y << x])
735 (SRLI [x] (MOVDconst [y])) => (MOVDconst [int64(uint64(y) >> x)])
736 (SRAI [x] (MOVDconst [y])) => (MOVDconst [int64(y) >> x])
737
738 // SLTI/SLTIU with constants.
739 (SLTI [x] (MOVDconst [y])) => (MOVDconst [b2i(int64(y) < int64(x))])
740 (SLTIU [x] (MOVDconst [y])) => (MOVDconst [b2i(uint64(y) < uint64(x))])
741
742 // Merge negation into fused multiply-add and multiply-subtract.
743 //
744 // Key:
745 //
746 // [+ -](x * y) [+ -] z.
747 // _ N A S
748 // D U
749 // D B
750 //
751 // Note: multiplication commutativity handled by rule generator.
752 (F(MADD|NMADD|MSUB|NMSUB)D neg:(FNEGD x) y z) && neg.Uses == 1 => (F(NMADD|MADD|NMSUB|MSUB)D x y z)
753 (F(MADD|NMADD|MSUB|NMSUB)D x y neg:(FNEGD z)) && neg.Uses == 1 => (F(MSUB|NMSUB|MADD|NMADD)D x y z)
754
View as plain text