Source file src/cmd/compile/internal/ssa/rewrite.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/logopt"
     9  	"cmd/compile/internal/types"
    10  	"cmd/internal/obj"
    11  	"cmd/internal/obj/s390x"
    12  	"cmd/internal/objabi"
    13  	"cmd/internal/src"
    14  	"encoding/binary"
    15  	"fmt"
    16  	"io"
    17  	"math"
    18  	"math/bits"
    19  	"os"
    20  	"path/filepath"
    21  )
    22  
    23  type deadValueChoice bool
    24  
    25  const (
    26  	leaveDeadValues  deadValueChoice = false
    27  	removeDeadValues                 = true
    28  )
    29  
    30  // deadcode indicates whether rewrite should try to remove any values that become dead.
    31  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    32  	// repeat rewrites until we find no more rewrites
    33  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    34  	pendingLines.clear()
    35  	debug := f.pass.debug
    36  	if debug > 1 {
    37  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    38  	}
    39  	var iters int
    40  	var states map[string]bool
    41  	for {
    42  		change := false
    43  		deadChange := false
    44  		for _, b := range f.Blocks {
    45  			var b0 *Block
    46  			if debug > 1 {
    47  				b0 = new(Block)
    48  				*b0 = *b
    49  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    50  			}
    51  			for i, c := range b.ControlValues() {
    52  				for c.Op == OpCopy {
    53  					c = c.Args[0]
    54  					b.ReplaceControl(i, c)
    55  				}
    56  			}
    57  			if rb(b) {
    58  				change = true
    59  				if debug > 1 {
    60  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    61  				}
    62  			}
    63  			for j, v := range b.Values {
    64  				var v0 *Value
    65  				if debug > 1 {
    66  					v0 = new(Value)
    67  					*v0 = *v
    68  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    69  				}
    70  				if v.Uses == 0 && v.removeable() {
    71  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    72  						// Reset any values that are now unused, so that we decrement
    73  						// the use count of all of its arguments.
    74  						// Not quite a deadcode pass, because it does not handle cycles.
    75  						// But it should help Uses==1 rules to fire.
    76  						v.reset(OpInvalid)
    77  						deadChange = true
    78  					}
    79  					// No point rewriting values which aren't used.
    80  					continue
    81  				}
    82  
    83  				vchange := phielimValue(v)
    84  				if vchange && debug > 1 {
    85  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
    86  				}
    87  
    88  				// Eliminate copy inputs.
    89  				// If any copy input becomes unused, mark it
    90  				// as invalid and discard its argument. Repeat
    91  				// recursively on the discarded argument.
    92  				// This phase helps remove phantom "dead copy" uses
    93  				// of a value so that a x.Uses==1 rule condition
    94  				// fires reliably.
    95  				for i, a := range v.Args {
    96  					if a.Op != OpCopy {
    97  						continue
    98  					}
    99  					aa := copySource(a)
   100  					v.SetArg(i, aa)
   101  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   102  					// to hold it.  The first candidate is the value that will replace a (aa),
   103  					// if it shares the same block and line and is eligible.
   104  					// The second option is v, which has a as an input.  Because aa is earlier in
   105  					// the data flow, it is the better choice.
   106  					if a.Pos.IsStmt() == src.PosIsStmt {
   107  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   108  							aa.Pos = aa.Pos.WithIsStmt()
   109  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   110  							v.Pos = v.Pos.WithIsStmt()
   111  						} else {
   112  							// Record the lost line and look for a new home after all rewrites are complete.
   113  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   114  							// line to appear in more than one block, but only one block is stored, so if both end
   115  							// up here, then one will be lost.
   116  							pendingLines.set(a.Pos, int32(a.Block.ID))
   117  						}
   118  						a.Pos = a.Pos.WithNotStmt()
   119  					}
   120  					vchange = true
   121  					for a.Uses == 0 {
   122  						b := a.Args[0]
   123  						a.reset(OpInvalid)
   124  						a = b
   125  					}
   126  				}
   127  				if vchange && debug > 1 {
   128  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   129  				}
   130  
   131  				// apply rewrite function
   132  				if rv(v) {
   133  					vchange = true
   134  					// If value changed to a poor choice for a statement boundary, move the boundary
   135  					if v.Pos.IsStmt() == src.PosIsStmt {
   136  						if k := nextGoodStatementIndex(v, j, b); k != j {
   137  							v.Pos = v.Pos.WithNotStmt()
   138  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   139  						}
   140  					}
   141  				}
   142  
   143  				change = change || vchange
   144  				if vchange && debug > 1 {
   145  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   146  				}
   147  			}
   148  		}
   149  		if !change && !deadChange {
   150  			break
   151  		}
   152  		iters++
   153  		if (iters > 1000 || debug >= 2) && change {
   154  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   155  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   156  			// and the maximum value encountered during make.bash is 12.
   157  			// Start checking for cycles. (This is too expensive to do routinely.)
   158  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   159  			if states == nil {
   160  				states = make(map[string]bool)
   161  			}
   162  			h := f.rewriteHash()
   163  			if _, ok := states[h]; ok {
   164  				// We've found a cycle.
   165  				// To diagnose it, set debug to 2 and start again,
   166  				// so that we'll print all rules applied until we complete another cycle.
   167  				// If debug is already >= 2, we've already done that, so it's time to crash.
   168  				if debug < 2 {
   169  					debug = 2
   170  					states = make(map[string]bool)
   171  				} else {
   172  					f.Fatalf("rewrite cycle detected")
   173  				}
   174  			}
   175  			states[h] = true
   176  		}
   177  	}
   178  	// remove clobbered values
   179  	for _, b := range f.Blocks {
   180  		j := 0
   181  		for i, v := range b.Values {
   182  			vl := v.Pos
   183  			if v.Op == OpInvalid {
   184  				if v.Pos.IsStmt() == src.PosIsStmt {
   185  					pendingLines.set(vl, int32(b.ID))
   186  				}
   187  				f.freeValue(v)
   188  				continue
   189  			}
   190  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
   191  				pendingLines.remove(vl)
   192  				v.Pos = v.Pos.WithIsStmt()
   193  			}
   194  			if i != j {
   195  				b.Values[j] = v
   196  			}
   197  			j++
   198  		}
   199  		if pendingLines.get(b.Pos) == int32(b.ID) {
   200  			b.Pos = b.Pos.WithIsStmt()
   201  			pendingLines.remove(b.Pos)
   202  		}
   203  		b.truncateValues(j)
   204  	}
   205  }
   206  
   207  // Common functions called from rewriting rules
   208  
   209  func is64BitFloat(t *types.Type) bool {
   210  	return t.Size() == 8 && t.IsFloat()
   211  }
   212  
   213  func is32BitFloat(t *types.Type) bool {
   214  	return t.Size() == 4 && t.IsFloat()
   215  }
   216  
   217  func is64BitInt(t *types.Type) bool {
   218  	return t.Size() == 8 && t.IsInteger()
   219  }
   220  
   221  func is32BitInt(t *types.Type) bool {
   222  	return t.Size() == 4 && t.IsInteger()
   223  }
   224  
   225  func is16BitInt(t *types.Type) bool {
   226  	return t.Size() == 2 && t.IsInteger()
   227  }
   228  
   229  func is8BitInt(t *types.Type) bool {
   230  	return t.Size() == 1 && t.IsInteger()
   231  }
   232  
   233  func isPtr(t *types.Type) bool {
   234  	return t.IsPtrShaped()
   235  }
   236  
   237  func isSigned(t *types.Type) bool {
   238  	return t.IsSigned()
   239  }
   240  
   241  // mergeSym merges two symbolic offsets. There is no real merging of
   242  // offsets, we just pick the non-nil one.
   243  func mergeSym(x, y Sym) Sym {
   244  	if x == nil {
   245  		return y
   246  	}
   247  	if y == nil {
   248  		return x
   249  	}
   250  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   251  }
   252  
   253  func canMergeSym(x, y Sym) bool {
   254  	return x == nil || y == nil
   255  }
   256  
   257  // canMergeLoadClobber reports whether the load can be merged into target without
   258  // invalidating the schedule.
   259  // It also checks that the other non-load argument x is something we
   260  // are ok with clobbering.
   261  func canMergeLoadClobber(target, load, x *Value) bool {
   262  	// The register containing x is going to get clobbered.
   263  	// Don't merge if we still need the value of x.
   264  	// We don't have liveness information here, but we can
   265  	// approximate x dying with:
   266  	//  1) target is x's only use.
   267  	//  2) target is not in a deeper loop than x.
   268  	if x.Uses != 1 {
   269  		return false
   270  	}
   271  	loopnest := x.Block.Func.loopnest()
   272  	loopnest.calculateDepths()
   273  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   274  		return false
   275  	}
   276  	return canMergeLoad(target, load)
   277  }
   278  
   279  // canMergeLoad reports whether the load can be merged into target without
   280  // invalidating the schedule.
   281  func canMergeLoad(target, load *Value) bool {
   282  	if target.Block.ID != load.Block.ID {
   283  		// If the load is in a different block do not merge it.
   284  		return false
   285  	}
   286  
   287  	// We can't merge the load into the target if the load
   288  	// has more than one use.
   289  	if load.Uses != 1 {
   290  		return false
   291  	}
   292  
   293  	mem := load.MemoryArg()
   294  
   295  	// We need the load's memory arg to still be alive at target. That
   296  	// can't be the case if one of target's args depends on a memory
   297  	// state that is a successor of load's memory arg.
   298  	//
   299  	// For example, it would be invalid to merge load into target in
   300  	// the following situation because newmem has killed oldmem
   301  	// before target is reached:
   302  	//     load = read ... oldmem
   303  	//   newmem = write ... oldmem
   304  	//     arg0 = read ... newmem
   305  	//   target = add arg0 load
   306  	//
   307  	// If the argument comes from a different block then we can exclude
   308  	// it immediately because it must dominate load (which is in the
   309  	// same block as target).
   310  	var args []*Value
   311  	for _, a := range target.Args {
   312  		if a != load && a.Block.ID == target.Block.ID {
   313  			args = append(args, a)
   314  		}
   315  	}
   316  
   317  	// memPreds contains memory states known to be predecessors of load's
   318  	// memory state. It is lazily initialized.
   319  	var memPreds map[*Value]bool
   320  	for i := 0; len(args) > 0; i++ {
   321  		const limit = 100
   322  		if i >= limit {
   323  			// Give up if we have done a lot of iterations.
   324  			return false
   325  		}
   326  		v := args[len(args)-1]
   327  		args = args[:len(args)-1]
   328  		if target.Block.ID != v.Block.ID {
   329  			// Since target and load are in the same block
   330  			// we can stop searching when we leave the block.
   331  			continue
   332  		}
   333  		if v.Op == OpPhi {
   334  			// A Phi implies we have reached the top of the block.
   335  			// The memory phi, if it exists, is always
   336  			// the first logical store in the block.
   337  			continue
   338  		}
   339  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   340  			// We could handle this situation however it is likely
   341  			// to be very rare.
   342  			return false
   343  		}
   344  		if v.Op.SymEffect()&SymAddr != 0 {
   345  			// This case prevents an operation that calculates the
   346  			// address of a local variable from being forced to schedule
   347  			// before its corresponding VarDef.
   348  			// See issue 28445.
   349  			//   v1 = LOAD ...
   350  			//   v2 = VARDEF
   351  			//   v3 = LEAQ
   352  			//   v4 = CMPQ v1 v3
   353  			// We don't want to combine the CMPQ with the load, because
   354  			// that would force the CMPQ to schedule before the VARDEF, which
   355  			// in turn requires the LEAQ to schedule before the VARDEF.
   356  			return false
   357  		}
   358  		if v.Type.IsMemory() {
   359  			if memPreds == nil {
   360  				// Initialise a map containing memory states
   361  				// known to be predecessors of load's memory
   362  				// state.
   363  				memPreds = make(map[*Value]bool)
   364  				m := mem
   365  				const limit = 50
   366  				for i := 0; i < limit; i++ {
   367  					if m.Op == OpPhi {
   368  						// The memory phi, if it exists, is always
   369  						// the first logical store in the block.
   370  						break
   371  					}
   372  					if m.Block.ID != target.Block.ID {
   373  						break
   374  					}
   375  					if !m.Type.IsMemory() {
   376  						break
   377  					}
   378  					memPreds[m] = true
   379  					if len(m.Args) == 0 {
   380  						break
   381  					}
   382  					m = m.MemoryArg()
   383  				}
   384  			}
   385  
   386  			// We can merge if v is a predecessor of mem.
   387  			//
   388  			// For example, we can merge load into target in the
   389  			// following scenario:
   390  			//      x = read ... v
   391  			//    mem = write ... v
   392  			//   load = read ... mem
   393  			// target = add x load
   394  			if memPreds[v] {
   395  				continue
   396  			}
   397  			return false
   398  		}
   399  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   400  			// If v takes mem as an input then we know mem
   401  			// is valid at this point.
   402  			continue
   403  		}
   404  		for _, a := range v.Args {
   405  			if target.Block.ID == a.Block.ID {
   406  				args = append(args, a)
   407  			}
   408  		}
   409  	}
   410  
   411  	return true
   412  }
   413  
   414  // isSameCall reports whether sym is the same as the given named symbol
   415  func isSameCall(sym interface{}, name string) bool {
   416  	fn := sym.(*AuxCall).Fn
   417  	return fn != nil && fn.String() == name
   418  }
   419  
   420  // canLoadUnaligned reports if the achitecture supports unaligned load operations
   421  func canLoadUnaligned(c *Config) bool {
   422  	return c.ctxt.Arch.Alignment == 1
   423  }
   424  
   425  // nlz returns the number of leading zeros.
   426  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   427  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   428  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   429  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   430  
   431  // ntzX returns the number of trailing zeros.
   432  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   433  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   434  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   435  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   436  
   437  func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
   438  func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
   439  func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
   440  func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
   441  func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
   442  
   443  // nto returns the number of trailing ones.
   444  func nto(x int64) int64 {
   445  	return int64(ntz64(^x))
   446  }
   447  
   448  // logX returns logarithm of n base 2.
   449  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   450  func log8(n int8) int64 {
   451  	return int64(bits.Len8(uint8(n))) - 1
   452  }
   453  func log16(n int16) int64 {
   454  	return int64(bits.Len16(uint16(n))) - 1
   455  }
   456  func log32(n int32) int64 {
   457  	return int64(bits.Len32(uint32(n))) - 1
   458  }
   459  func log64(n int64) int64 {
   460  	return int64(bits.Len64(uint64(n))) - 1
   461  }
   462  
   463  // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
   464  // Rounds down.
   465  func log2uint32(n int64) int64 {
   466  	return int64(bits.Len32(uint32(n))) - 1
   467  }
   468  
   469  // isPowerOfTwo functions report whether n is a power of 2.
   470  func isPowerOfTwo8(n int8) bool {
   471  	return n > 0 && n&(n-1) == 0
   472  }
   473  func isPowerOfTwo16(n int16) bool {
   474  	return n > 0 && n&(n-1) == 0
   475  }
   476  func isPowerOfTwo32(n int32) bool {
   477  	return n > 0 && n&(n-1) == 0
   478  }
   479  func isPowerOfTwo64(n int64) bool {
   480  	return n > 0 && n&(n-1) == 0
   481  }
   482  
   483  // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
   484  func isUint64PowerOfTwo(in int64) bool {
   485  	n := uint64(in)
   486  	return n > 0 && n&(n-1) == 0
   487  }
   488  
   489  // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
   490  func isUint32PowerOfTwo(in int64) bool {
   491  	n := uint64(uint32(in))
   492  	return n > 0 && n&(n-1) == 0
   493  }
   494  
   495  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   496  func is32Bit(n int64) bool {
   497  	return n == int64(int32(n))
   498  }
   499  
   500  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   501  func is16Bit(n int64) bool {
   502  	return n == int64(int16(n))
   503  }
   504  
   505  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   506  func is8Bit(n int64) bool {
   507  	return n == int64(int8(n))
   508  }
   509  
   510  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   511  func isU8Bit(n int64) bool {
   512  	return n == int64(uint8(n))
   513  }
   514  
   515  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   516  func isU12Bit(n int64) bool {
   517  	return 0 <= n && n < (1<<12)
   518  }
   519  
   520  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   521  func isU16Bit(n int64) bool {
   522  	return n == int64(uint16(n))
   523  }
   524  
   525  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   526  func isU32Bit(n int64) bool {
   527  	return n == int64(uint32(n))
   528  }
   529  
   530  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   531  func is20Bit(n int64) bool {
   532  	return -(1<<19) <= n && n < (1<<19)
   533  }
   534  
   535  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   536  func b2i(b bool) int64 {
   537  	if b {
   538  		return 1
   539  	}
   540  	return 0
   541  }
   542  
   543  // b2i32 translates a boolean value to 0 or 1.
   544  func b2i32(b bool) int32 {
   545  	if b {
   546  		return 1
   547  	}
   548  	return 0
   549  }
   550  
   551  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   552  // A shift is bounded if it is shifting by less than the width of the shifted value.
   553  func shiftIsBounded(v *Value) bool {
   554  	return v.AuxInt != 0
   555  }
   556  
   557  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   558  // generated code as much as possible.
   559  func canonLessThan(x, y *Value) bool {
   560  	if x.Op != y.Op {
   561  		return x.Op < y.Op
   562  	}
   563  	if !x.Pos.SameFileAndLine(y.Pos) {
   564  		return x.Pos.Before(y.Pos)
   565  	}
   566  	return x.ID < y.ID
   567  }
   568  
   569  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   570  // of the mantissa. It will panic if the truncation results in lost information.
   571  func truncate64Fto32F(f float64) float32 {
   572  	if !isExactFloat32(f) {
   573  		panic("truncate64Fto32F: truncation is not exact")
   574  	}
   575  	if !math.IsNaN(f) {
   576  		return float32(f)
   577  	}
   578  	// NaN bit patterns aren't necessarily preserved across conversion
   579  	// instructions so we need to do the conversion manually.
   580  	b := math.Float64bits(f)
   581  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   582  	//          | sign                  | exponent   | mantissa       |
   583  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   584  	return math.Float32frombits(r)
   585  }
   586  
   587  // extend32Fto64F converts a float32 value to a float64 value preserving the bit
   588  // pattern of the mantissa.
   589  func extend32Fto64F(f float32) float64 {
   590  	if !math.IsNaN(float64(f)) {
   591  		return float64(f)
   592  	}
   593  	// NaN bit patterns aren't necessarily preserved across conversion
   594  	// instructions so we need to do the conversion manually.
   595  	b := uint64(math.Float32bits(f))
   596  	//   | sign                  | exponent      | mantissa                    |
   597  	r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
   598  	return math.Float64frombits(r)
   599  }
   600  
   601  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   602  func DivisionNeedsFixUp(v *Value) bool {
   603  	return v.AuxInt == 0
   604  }
   605  
   606  // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
   607  func auxFrom64F(f float64) int64 {
   608  	if f != f {
   609  		panic("can't encode a NaN in AuxInt field")
   610  	}
   611  	return int64(math.Float64bits(f))
   612  }
   613  
   614  // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
   615  func auxFrom32F(f float32) int64 {
   616  	if f != f {
   617  		panic("can't encode a NaN in AuxInt field")
   618  	}
   619  	return int64(math.Float64bits(extend32Fto64F(f)))
   620  }
   621  
   622  // auxTo32F decodes a float32 from the AuxInt value provided.
   623  func auxTo32F(i int64) float32 {
   624  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   625  }
   626  
   627  // auxTo64F decodes a float64 from the AuxInt value provided.
   628  func auxTo64F(i int64) float64 {
   629  	return math.Float64frombits(uint64(i))
   630  }
   631  
   632  func auxIntToBool(i int64) bool {
   633  	if i == 0 {
   634  		return false
   635  	}
   636  	return true
   637  }
   638  func auxIntToInt8(i int64) int8 {
   639  	return int8(i)
   640  }
   641  func auxIntToInt16(i int64) int16 {
   642  	return int16(i)
   643  }
   644  func auxIntToInt32(i int64) int32 {
   645  	return int32(i)
   646  }
   647  func auxIntToInt64(i int64) int64 {
   648  	return i
   649  }
   650  func auxIntToUint8(i int64) uint8 {
   651  	return uint8(i)
   652  }
   653  func auxIntToFloat32(i int64) float32 {
   654  	return float32(math.Float64frombits(uint64(i)))
   655  }
   656  func auxIntToFloat64(i int64) float64 {
   657  	return math.Float64frombits(uint64(i))
   658  }
   659  func auxIntToValAndOff(i int64) ValAndOff {
   660  	return ValAndOff(i)
   661  }
   662  func auxIntToArm64BitField(i int64) arm64BitField {
   663  	return arm64BitField(i)
   664  }
   665  func auxIntToInt128(x int64) int128 {
   666  	if x != 0 {
   667  		panic("nonzero int128 not allowed")
   668  	}
   669  	return 0
   670  }
   671  func auxIntToFlagConstant(x int64) flagConstant {
   672  	return flagConstant(x)
   673  }
   674  
   675  func auxIntToOp(cc int64) Op {
   676  	return Op(cc)
   677  }
   678  
   679  func boolToAuxInt(b bool) int64 {
   680  	if b {
   681  		return 1
   682  	}
   683  	return 0
   684  }
   685  func int8ToAuxInt(i int8) int64 {
   686  	return int64(i)
   687  }
   688  func int16ToAuxInt(i int16) int64 {
   689  	return int64(i)
   690  }
   691  func int32ToAuxInt(i int32) int64 {
   692  	return int64(i)
   693  }
   694  func int64ToAuxInt(i int64) int64 {
   695  	return int64(i)
   696  }
   697  func uint8ToAuxInt(i uint8) int64 {
   698  	return int64(int8(i))
   699  }
   700  func float32ToAuxInt(f float32) int64 {
   701  	return int64(math.Float64bits(float64(f)))
   702  }
   703  func float64ToAuxInt(f float64) int64 {
   704  	return int64(math.Float64bits(f))
   705  }
   706  func valAndOffToAuxInt(v ValAndOff) int64 {
   707  	return int64(v)
   708  }
   709  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   710  	return int64(v)
   711  }
   712  func int128ToAuxInt(x int128) int64 {
   713  	if x != 0 {
   714  		panic("nonzero int128 not allowed")
   715  	}
   716  	return 0
   717  }
   718  func flagConstantToAuxInt(x flagConstant) int64 {
   719  	return int64(x)
   720  }
   721  
   722  func opToAuxInt(o Op) int64 {
   723  	return int64(o)
   724  }
   725  
   726  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   727  type Aux interface {
   728  	CanBeAnSSAAux()
   729  }
   730  
   731  // stringAux wraps string values for use in Aux.
   732  type stringAux string
   733  
   734  func (stringAux) CanBeAnSSAAux() {}
   735  
   736  func auxToString(i Aux) string {
   737  	return string(i.(stringAux))
   738  }
   739  func auxToSym(i Aux) Sym {
   740  	// TODO: kind of a hack - allows nil interface through
   741  	s, _ := i.(Sym)
   742  	return s
   743  }
   744  func auxToType(i Aux) *types.Type {
   745  	return i.(*types.Type)
   746  }
   747  func auxToCall(i Aux) *AuxCall {
   748  	return i.(*AuxCall)
   749  }
   750  func auxToS390xCCMask(i Aux) s390x.CCMask {
   751  	return i.(s390x.CCMask)
   752  }
   753  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   754  	return i.(s390x.RotateParams)
   755  }
   756  
   757  func StringToAux(s string) Aux {
   758  	return stringAux(s)
   759  }
   760  func symToAux(s Sym) Aux {
   761  	return s
   762  }
   763  func callToAux(s *AuxCall) Aux {
   764  	return s
   765  }
   766  func typeToAux(t *types.Type) Aux {
   767  	return t
   768  }
   769  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   770  	return c
   771  }
   772  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   773  	return r
   774  }
   775  
   776  // uaddOvf reports whether unsigned a+b would overflow.
   777  func uaddOvf(a, b int64) bool {
   778  	return uint64(a)+uint64(b) < uint64(a)
   779  }
   780  
   781  // loadLSymOffset simulates reading a word at an offset into a
   782  // read-only symbol's runtime memory. If it would read a pointer to
   783  // another symbol, that symbol is returned. Otherwise, it returns nil.
   784  func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
   785  	if lsym.Type != objabi.SRODATA {
   786  		return nil
   787  	}
   788  
   789  	for _, r := range lsym.R {
   790  		if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
   791  			return r.Sym
   792  		}
   793  	}
   794  
   795  	return nil
   796  }
   797  
   798  // de-virtualize an InterLECall
   799  // 'sym' is the symbol for the itab
   800  func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym {
   801  	n, ok := sym.(*obj.LSym)
   802  	if !ok {
   803  		return nil
   804  	}
   805  
   806  	lsym := loadLSymOffset(n, offset)
   807  	if f := v.Block.Func; f.pass.debug > 0 {
   808  		if lsym != nil {
   809  			f.Warnl(v.Pos, "de-virtualizing call")
   810  		} else {
   811  			f.Warnl(v.Pos, "couldn't de-virtualize call")
   812  		}
   813  	}
   814  	return lsym
   815  }
   816  
   817  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   818  	v.Op = OpStaticLECall
   819  	auxcall := v.Aux.(*AuxCall)
   820  	auxcall.Fn = sym
   821  	// Remove first arg
   822  	v.Args[0].Uses--
   823  	copy(v.Args[0:], v.Args[1:])
   824  	v.Args[len(v.Args)-1] = nil // aid GC
   825  	v.Args = v.Args[:len(v.Args)-1]
   826  	return v
   827  }
   828  
   829  // isSamePtr reports whether p1 and p2 point to the same address.
   830  func isSamePtr(p1, p2 *Value) bool {
   831  	if p1 == p2 {
   832  		return true
   833  	}
   834  	if p1.Op != p2.Op {
   835  		return false
   836  	}
   837  	switch p1.Op {
   838  	case OpOffPtr:
   839  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   840  	case OpAddr, OpLocalAddr:
   841  		// OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
   842  		// Checking for value equality only works after [z]cse has run.
   843  		return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
   844  	case OpAddPtr:
   845  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   846  	}
   847  	return false
   848  }
   849  
   850  func isStackPtr(v *Value) bool {
   851  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   852  		v = v.Args[0]
   853  	}
   854  	return v.Op == OpSP || v.Op == OpLocalAddr
   855  }
   856  
   857  // disjoint reports whether the memory region specified by [p1:p1+n1)
   858  // does not overlap with [p2:p2+n2).
   859  // A return value of false does not imply the regions overlap.
   860  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   861  	if n1 == 0 || n2 == 0 {
   862  		return true
   863  	}
   864  	if p1 == p2 {
   865  		return false
   866  	}
   867  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   868  		base, offset = ptr, 0
   869  		for base.Op == OpOffPtr {
   870  			offset += base.AuxInt
   871  			base = base.Args[0]
   872  		}
   873  		return base, offset
   874  	}
   875  	p1, off1 := baseAndOffset(p1)
   876  	p2, off2 := baseAndOffset(p2)
   877  	if isSamePtr(p1, p2) {
   878  		return !overlap(off1, n1, off2, n2)
   879  	}
   880  	// p1 and p2 are not the same, so if they are both OpAddrs then
   881  	// they point to different variables.
   882  	// If one pointer is on the stack and the other is an argument
   883  	// then they can't overlap.
   884  	switch p1.Op {
   885  	case OpAddr, OpLocalAddr:
   886  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   887  			return true
   888  		}
   889  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   890  	case OpArg, OpArgIntReg:
   891  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   892  			return true
   893  		}
   894  	case OpSP:
   895  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   896  	}
   897  	return false
   898  }
   899  
   900  // moveSize returns the number of bytes an aligned MOV instruction moves
   901  func moveSize(align int64, c *Config) int64 {
   902  	switch {
   903  	case align%8 == 0 && c.PtrSize == 8:
   904  		return 8
   905  	case align%4 == 0:
   906  		return 4
   907  	case align%2 == 0:
   908  		return 2
   909  	}
   910  	return 1
   911  }
   912  
   913  // mergePoint finds a block among a's blocks which dominates b and is itself
   914  // dominated by all of a's blocks. Returns nil if it can't find one.
   915  // Might return nil even if one does exist.
   916  func mergePoint(b *Block, a ...*Value) *Block {
   917  	// Walk backward from b looking for one of the a's blocks.
   918  
   919  	// Max distance
   920  	d := 100
   921  
   922  	for d > 0 {
   923  		for _, x := range a {
   924  			if b == x.Block {
   925  				goto found
   926  			}
   927  		}
   928  		if len(b.Preds) > 1 {
   929  			// Don't know which way to go back. Abort.
   930  			return nil
   931  		}
   932  		b = b.Preds[0].b
   933  		d--
   934  	}
   935  	return nil // too far away
   936  found:
   937  	// At this point, r is the first value in a that we find by walking backwards.
   938  	// if we return anything, r will be it.
   939  	r := b
   940  
   941  	// Keep going, counting the other a's that we find. They must all dominate r.
   942  	na := 0
   943  	for d > 0 {
   944  		for _, x := range a {
   945  			if b == x.Block {
   946  				na++
   947  			}
   948  		}
   949  		if na == len(a) {
   950  			// Found all of a in a backwards walk. We can return r.
   951  			return r
   952  		}
   953  		if len(b.Preds) > 1 {
   954  			return nil
   955  		}
   956  		b = b.Preds[0].b
   957  		d--
   958  
   959  	}
   960  	return nil // too far away
   961  }
   962  
   963  // clobber invalidates values. Returns true.
   964  // clobber is used by rewrite rules to:
   965  //   A) make sure the values are really dead and never used again.
   966  //   B) decrement use counts of the values' args.
   967  func clobber(vv ...*Value) bool {
   968  	for _, v := range vv {
   969  		v.reset(OpInvalid)
   970  		// Note: leave v.Block intact.  The Block field is used after clobber.
   971  	}
   972  	return true
   973  }
   974  
   975  // clobberIfDead resets v when use count is 1. Returns true.
   976  // clobberIfDead is used by rewrite rules to decrement
   977  // use counts of v's args when v is dead and never used.
   978  func clobberIfDead(v *Value) bool {
   979  	if v.Uses == 1 {
   980  		v.reset(OpInvalid)
   981  	}
   982  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
   983  	return true
   984  }
   985  
   986  // noteRule is an easy way to track if a rule is matched when writing
   987  // new ones.  Make the rule of interest also conditional on
   988  //     noteRule("note to self: rule of interest matched")
   989  // and that message will print when the rule matches.
   990  func noteRule(s string) bool {
   991  	fmt.Println(s)
   992  	return true
   993  }
   994  
   995  // countRule increments Func.ruleMatches[key].
   996  // If Func.ruleMatches is non-nil at the end
   997  // of compilation, it will be printed to stdout.
   998  // This is intended to make it easier to find which functions
   999  // which contain lots of rules matches when developing new rules.
  1000  func countRule(v *Value, key string) bool {
  1001  	f := v.Block.Func
  1002  	if f.ruleMatches == nil {
  1003  		f.ruleMatches = make(map[string]int)
  1004  	}
  1005  	f.ruleMatches[key]++
  1006  	return true
  1007  }
  1008  
  1009  // warnRule generates compiler debug output with string s when
  1010  // v is not in autogenerated code, cond is true and the rule has fired.
  1011  func warnRule(cond bool, v *Value, s string) bool {
  1012  	if pos := v.Pos; pos.Line() > 1 && cond {
  1013  		v.Block.Func.Warnl(pos, s)
  1014  	}
  1015  	return true
  1016  }
  1017  
  1018  // for a pseudo-op like (LessThan x), extract x
  1019  func flagArg(v *Value) *Value {
  1020  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1021  		return nil
  1022  	}
  1023  	return v.Args[0]
  1024  }
  1025  
  1026  // arm64Negate finds the complement to an ARM64 condition code,
  1027  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1028  //
  1029  // For floating point, it's more subtle because NaN is unordered. We do
  1030  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1031  func arm64Negate(op Op) Op {
  1032  	switch op {
  1033  	case OpARM64LessThan:
  1034  		return OpARM64GreaterEqual
  1035  	case OpARM64LessThanU:
  1036  		return OpARM64GreaterEqualU
  1037  	case OpARM64GreaterThan:
  1038  		return OpARM64LessEqual
  1039  	case OpARM64GreaterThanU:
  1040  		return OpARM64LessEqualU
  1041  	case OpARM64LessEqual:
  1042  		return OpARM64GreaterThan
  1043  	case OpARM64LessEqualU:
  1044  		return OpARM64GreaterThanU
  1045  	case OpARM64GreaterEqual:
  1046  		return OpARM64LessThan
  1047  	case OpARM64GreaterEqualU:
  1048  		return OpARM64LessThanU
  1049  	case OpARM64Equal:
  1050  		return OpARM64NotEqual
  1051  	case OpARM64NotEqual:
  1052  		return OpARM64Equal
  1053  	case OpARM64LessThanF:
  1054  		return OpARM64NotLessThanF
  1055  	case OpARM64NotLessThanF:
  1056  		return OpARM64LessThanF
  1057  	case OpARM64LessEqualF:
  1058  		return OpARM64NotLessEqualF
  1059  	case OpARM64NotLessEqualF:
  1060  		return OpARM64LessEqualF
  1061  	case OpARM64GreaterThanF:
  1062  		return OpARM64NotGreaterThanF
  1063  	case OpARM64NotGreaterThanF:
  1064  		return OpARM64GreaterThanF
  1065  	case OpARM64GreaterEqualF:
  1066  		return OpARM64NotGreaterEqualF
  1067  	case OpARM64NotGreaterEqualF:
  1068  		return OpARM64GreaterEqualF
  1069  	default:
  1070  		panic("unreachable")
  1071  	}
  1072  }
  1073  
  1074  // arm64Invert evaluates (InvertFlags op), which
  1075  // is the same as altering the condition codes such
  1076  // that the same result would be produced if the arguments
  1077  // to the flag-generating instruction were reversed, e.g.
  1078  // (InvertFlags (CMP x y)) -> (CMP y x)
  1079  func arm64Invert(op Op) Op {
  1080  	switch op {
  1081  	case OpARM64LessThan:
  1082  		return OpARM64GreaterThan
  1083  	case OpARM64LessThanU:
  1084  		return OpARM64GreaterThanU
  1085  	case OpARM64GreaterThan:
  1086  		return OpARM64LessThan
  1087  	case OpARM64GreaterThanU:
  1088  		return OpARM64LessThanU
  1089  	case OpARM64LessEqual:
  1090  		return OpARM64GreaterEqual
  1091  	case OpARM64LessEqualU:
  1092  		return OpARM64GreaterEqualU
  1093  	case OpARM64GreaterEqual:
  1094  		return OpARM64LessEqual
  1095  	case OpARM64GreaterEqualU:
  1096  		return OpARM64LessEqualU
  1097  	case OpARM64Equal, OpARM64NotEqual:
  1098  		return op
  1099  	case OpARM64LessThanF:
  1100  		return OpARM64GreaterThanF
  1101  	case OpARM64GreaterThanF:
  1102  		return OpARM64LessThanF
  1103  	case OpARM64LessEqualF:
  1104  		return OpARM64GreaterEqualF
  1105  	case OpARM64GreaterEqualF:
  1106  		return OpARM64LessEqualF
  1107  	case OpARM64NotLessThanF:
  1108  		return OpARM64NotGreaterThanF
  1109  	case OpARM64NotGreaterThanF:
  1110  		return OpARM64NotLessThanF
  1111  	case OpARM64NotLessEqualF:
  1112  		return OpARM64NotGreaterEqualF
  1113  	case OpARM64NotGreaterEqualF:
  1114  		return OpARM64NotLessEqualF
  1115  	default:
  1116  		panic("unreachable")
  1117  	}
  1118  }
  1119  
  1120  // evaluate an ARM64 op against a flags value
  1121  // that is potentially constant; return 1 for true,
  1122  // -1 for false, and 0 for not constant.
  1123  func ccARM64Eval(op Op, flags *Value) int {
  1124  	fop := flags.Op
  1125  	if fop == OpARM64InvertFlags {
  1126  		return -ccARM64Eval(op, flags.Args[0])
  1127  	}
  1128  	if fop != OpARM64FlagConstant {
  1129  		return 0
  1130  	}
  1131  	fc := flagConstant(flags.AuxInt)
  1132  	b2i := func(b bool) int {
  1133  		if b {
  1134  			return 1
  1135  		}
  1136  		return -1
  1137  	}
  1138  	switch op {
  1139  	case OpARM64Equal:
  1140  		return b2i(fc.eq())
  1141  	case OpARM64NotEqual:
  1142  		return b2i(fc.ne())
  1143  	case OpARM64LessThan:
  1144  		return b2i(fc.lt())
  1145  	case OpARM64LessThanU:
  1146  		return b2i(fc.ult())
  1147  	case OpARM64GreaterThan:
  1148  		return b2i(fc.gt())
  1149  	case OpARM64GreaterThanU:
  1150  		return b2i(fc.ugt())
  1151  	case OpARM64LessEqual:
  1152  		return b2i(fc.le())
  1153  	case OpARM64LessEqualU:
  1154  		return b2i(fc.ule())
  1155  	case OpARM64GreaterEqual:
  1156  		return b2i(fc.ge())
  1157  	case OpARM64GreaterEqualU:
  1158  		return b2i(fc.uge())
  1159  	}
  1160  	return 0
  1161  }
  1162  
  1163  // logRule logs the use of the rule s. This will only be enabled if
  1164  // rewrite rules were generated with the -log option, see gen/rulegen.go.
  1165  func logRule(s string) {
  1166  	if ruleFile == nil {
  1167  		// Open a log file to write log to. We open in append
  1168  		// mode because all.bash runs the compiler lots of times,
  1169  		// and we want the concatenation of all of those logs.
  1170  		// This means, of course, that users need to rm the old log
  1171  		// to get fresh data.
  1172  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1173  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1174  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1175  		if err != nil {
  1176  			panic(err)
  1177  		}
  1178  		ruleFile = w
  1179  	}
  1180  	_, err := fmt.Fprintln(ruleFile, s)
  1181  	if err != nil {
  1182  		panic(err)
  1183  	}
  1184  }
  1185  
  1186  var ruleFile io.Writer
  1187  
  1188  func min(x, y int64) int64 {
  1189  	if x < y {
  1190  		return x
  1191  	}
  1192  	return y
  1193  }
  1194  
  1195  func isConstZero(v *Value) bool {
  1196  	switch v.Op {
  1197  	case OpConstNil:
  1198  		return true
  1199  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1200  		return v.AuxInt == 0
  1201  	}
  1202  	return false
  1203  }
  1204  
  1205  // reciprocalExact64 reports whether 1/c is exactly representable.
  1206  func reciprocalExact64(c float64) bool {
  1207  	b := math.Float64bits(c)
  1208  	man := b & (1<<52 - 1)
  1209  	if man != 0 {
  1210  		return false // not a power of 2, denormal, or NaN
  1211  	}
  1212  	exp := b >> 52 & (1<<11 - 1)
  1213  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1214  	// changes the exponent to 0x7fe-exp.
  1215  	switch exp {
  1216  	case 0:
  1217  		return false // ±0
  1218  	case 0x7ff:
  1219  		return false // ±inf
  1220  	case 0x7fe:
  1221  		return false // exponent is not representable
  1222  	default:
  1223  		return true
  1224  	}
  1225  }
  1226  
  1227  // reciprocalExact32 reports whether 1/c is exactly representable.
  1228  func reciprocalExact32(c float32) bool {
  1229  	b := math.Float32bits(c)
  1230  	man := b & (1<<23 - 1)
  1231  	if man != 0 {
  1232  		return false // not a power of 2, denormal, or NaN
  1233  	}
  1234  	exp := b >> 23 & (1<<8 - 1)
  1235  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1236  	// changes the exponent to 0xfe-exp.
  1237  	switch exp {
  1238  	case 0:
  1239  		return false // ±0
  1240  	case 0xff:
  1241  		return false // ±inf
  1242  	case 0xfe:
  1243  		return false // exponent is not representable
  1244  	default:
  1245  		return true
  1246  	}
  1247  }
  1248  
  1249  // check if an immediate can be directly encoded into an ARM's instruction
  1250  func isARMImmRot(v uint32) bool {
  1251  	for i := 0; i < 16; i++ {
  1252  		if v&^0xff == 0 {
  1253  			return true
  1254  		}
  1255  		v = v<<2 | v>>30
  1256  	}
  1257  
  1258  	return false
  1259  }
  1260  
  1261  // overlap reports whether the ranges given by the given offset and
  1262  // size pairs overlap.
  1263  func overlap(offset1, size1, offset2, size2 int64) bool {
  1264  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1265  		return true
  1266  	}
  1267  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1268  		return true
  1269  	}
  1270  	return false
  1271  }
  1272  
  1273  func areAdjacentOffsets(off1, off2, size int64) bool {
  1274  	return off1+size == off2 || off1 == off2+size
  1275  }
  1276  
  1277  // check if value zeroes out upper 32-bit of 64-bit register.
  1278  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1279  // because it catches same amount of cases as 4.
  1280  func zeroUpper32Bits(x *Value, depth int) bool {
  1281  	switch x.Op {
  1282  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1283  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1284  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1285  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1286  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1287  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1288  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1289  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1290  		OpAMD64SHLL, OpAMD64SHLLconst:
  1291  		return true
  1292  	case OpArg:
  1293  		return x.Type.Size() == 4
  1294  	case OpPhi, OpSelect0, OpSelect1:
  1295  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1296  		// just limit recursion depth.
  1297  		if depth <= 0 {
  1298  			return false
  1299  		}
  1300  		for i := range x.Args {
  1301  			if !zeroUpper32Bits(x.Args[i], depth-1) {
  1302  				return false
  1303  			}
  1304  		}
  1305  		return true
  1306  
  1307  	}
  1308  	return false
  1309  }
  1310  
  1311  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits
  1312  func zeroUpper48Bits(x *Value, depth int) bool {
  1313  	switch x.Op {
  1314  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1315  		return true
  1316  	case OpArg:
  1317  		return x.Type.Size() == 2
  1318  	case OpPhi, OpSelect0, OpSelect1:
  1319  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1320  		// just limit recursion depth.
  1321  		if depth <= 0 {
  1322  			return false
  1323  		}
  1324  		for i := range x.Args {
  1325  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1326  				return false
  1327  			}
  1328  		}
  1329  		return true
  1330  
  1331  	}
  1332  	return false
  1333  }
  1334  
  1335  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits
  1336  func zeroUpper56Bits(x *Value, depth int) bool {
  1337  	switch x.Op {
  1338  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1339  		return true
  1340  	case OpArg:
  1341  		return x.Type.Size() == 1
  1342  	case OpPhi, OpSelect0, OpSelect1:
  1343  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1344  		// just limit recursion depth.
  1345  		if depth <= 0 {
  1346  			return false
  1347  		}
  1348  		for i := range x.Args {
  1349  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1350  				return false
  1351  			}
  1352  		}
  1353  		return true
  1354  
  1355  	}
  1356  	return false
  1357  }
  1358  
  1359  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1360  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1361  // safe, either because Move is small or because the arguments are disjoint.
  1362  // This is used as a check for replacing memmove with Move ops.
  1363  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1364  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1365  	// Move ops may or may not be faster for large sizes depending on how the platform
  1366  	// lowers them, so we only perform this optimization on platforms that we know to
  1367  	// have fast Move ops.
  1368  	switch c.arch {
  1369  	case "amd64":
  1370  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1371  	case "386", "arm64":
  1372  		return sz <= 8
  1373  	case "s390x", "ppc64", "ppc64le":
  1374  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1375  	case "arm", "mips", "mips64", "mipsle", "mips64le":
  1376  		return sz <= 4
  1377  	}
  1378  	return false
  1379  }
  1380  
  1381  // logLargeCopy logs the occurrence of a large copy.
  1382  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1383  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1384  func logLargeCopy(v *Value, s int64) bool {
  1385  	if s < 128 {
  1386  		return true
  1387  	}
  1388  	if logopt.Enabled() {
  1389  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1390  	}
  1391  	return true
  1392  }
  1393  
  1394  // hasSmallRotate reports whether the architecture has rotate instructions
  1395  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1396  func hasSmallRotate(c *Config) bool {
  1397  	switch c.arch {
  1398  	case "amd64", "386":
  1399  		return true
  1400  	default:
  1401  		return false
  1402  	}
  1403  }
  1404  
  1405  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1406  	if sh < 0 || sh >= sz {
  1407  		panic("PPC64 shift arg sh out of range")
  1408  	}
  1409  	if mb < 0 || mb >= sz {
  1410  		panic("PPC64 shift arg mb out of range")
  1411  	}
  1412  	if me < 0 || me >= sz {
  1413  		panic("PPC64 shift arg me out of range")
  1414  	}
  1415  	return int32(sh<<16 | mb<<8 | me)
  1416  }
  1417  
  1418  func GetPPC64Shiftsh(auxint int64) int64 {
  1419  	return int64(int8(auxint >> 16))
  1420  }
  1421  
  1422  func GetPPC64Shiftmb(auxint int64) int64 {
  1423  	return int64(int8(auxint >> 8))
  1424  }
  1425  
  1426  func GetPPC64Shiftme(auxint int64) int64 {
  1427  	return int64(int8(auxint))
  1428  }
  1429  
  1430  // Test if this value can encoded as a mask for a rlwinm like
  1431  // operation.  Masks can also extend from the msb and wrap to
  1432  // the lsb too.  That is, the valid masks are 32 bit strings
  1433  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1434  func isPPC64WordRotateMask(v64 int64) bool {
  1435  	// Isolate rightmost 1 (if none 0) and add.
  1436  	v := uint32(v64)
  1437  	vp := (v & -v) + v
  1438  	// Likewise, for the wrapping case.
  1439  	vn := ^v
  1440  	vpn := (vn & -vn) + vn
  1441  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1442  }
  1443  
  1444  // Compress mask and shift into single value of the form
  1445  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1446  // be used to regenerate the input mask.
  1447  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1448  	var mb, me, mbn, men int
  1449  
  1450  	// Determine boundaries and then decode them
  1451  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1452  		panic("Invalid PPC64 rotate mask")
  1453  	} else if nbits == 32 {
  1454  		mb = bits.LeadingZeros32(uint32(mask))
  1455  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1456  		mbn = bits.LeadingZeros32(^uint32(mask))
  1457  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1458  	} else {
  1459  		mb = bits.LeadingZeros64(uint64(mask))
  1460  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1461  		mbn = bits.LeadingZeros64(^uint64(mask))
  1462  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1463  	}
  1464  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1465  	if mb == 0 && me == int(nbits) {
  1466  		// swap the inverted values
  1467  		mb, me = men, mbn
  1468  	}
  1469  
  1470  	return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
  1471  }
  1472  
  1473  // The inverse operation of encodePPC64RotateMask.  The values returned as
  1474  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1475  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1476  	auxint := uint64(sauxint)
  1477  	rotate = int64((auxint >> 16) & 0xFF)
  1478  	mb = int64((auxint >> 8) & 0xFF)
  1479  	me = int64((auxint >> 0) & 0xFF)
  1480  	nbits := int64((auxint >> 24) & 0xFF)
  1481  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1482  	if mb > me {
  1483  		mask = ^mask
  1484  	}
  1485  	if nbits == 32 {
  1486  		mask = uint64(uint32(mask))
  1487  	}
  1488  
  1489  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1490  	// is inclusive.
  1491  	me = (me - 1) & (nbits - 1)
  1492  	return
  1493  }
  1494  
  1495  // This verifies that the mask is a set of
  1496  // consecutive bits including the least
  1497  // significant bit.
  1498  func isPPC64ValidShiftMask(v int64) bool {
  1499  	if (v != 0) && ((v+1)&v) == 0 {
  1500  		return true
  1501  	}
  1502  	return false
  1503  }
  1504  
  1505  func getPPC64ShiftMaskLength(v int64) int64 {
  1506  	return int64(bits.Len64(uint64(v)))
  1507  }
  1508  
  1509  // Decompose a shift right into an equivalent rotate/mask,
  1510  // and return mask & m.
  1511  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1512  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1513  	return m & int64(smask)
  1514  }
  1515  
  1516  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1517  func mergePPC64AndSrwi(m, s int64) int64 {
  1518  	mask := mergePPC64RShiftMask(m, s, 32)
  1519  	if !isPPC64WordRotateMask(mask) {
  1520  		return 0
  1521  	}
  1522  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1523  }
  1524  
  1525  // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1526  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1527  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1528  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1529  	// for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
  1530  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1531  
  1532  	// Rewrite mask to apply after the final left shift.
  1533  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1534  
  1535  	r_1 := 32 - srw
  1536  	r_2 := GetPPC64Shiftsh(sld)
  1537  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1538  
  1539  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1540  		return 0
  1541  	}
  1542  	return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
  1543  }
  1544  
  1545  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1546  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1547  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1548  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1549  	// for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
  1550  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1551  
  1552  	// combine the masks, and adjust for the final left shift.
  1553  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1554  	r_2 := GetPPC64Shiftsh(int64(sld))
  1555  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1556  
  1557  	// Verify the result is still a valid bitmask of <= 32 bits.
  1558  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1559  		return 0
  1560  	}
  1561  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1562  }
  1563  
  1564  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1565  // or return 0 if they cannot be combined.
  1566  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1567  	if sld > srw || srw >= 32 {
  1568  		return 0
  1569  	}
  1570  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1571  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1572  	mask := (mask_r & mask_l) << uint(sld)
  1573  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1574  }
  1575  
  1576  // Convenience function to rotate a 32 bit constant value by another constant.
  1577  func rotateLeft32(v, rotate int64) int64 {
  1578  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1579  }
  1580  
  1581  func rotateRight64(v, rotate int64) int64 {
  1582  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1583  }
  1584  
  1585  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1586  func armBFAuxInt(lsb, width int64) arm64BitField {
  1587  	if lsb < 0 || lsb > 63 {
  1588  		panic("ARM(64) bit field lsb constant out of range")
  1589  	}
  1590  	if width < 1 || lsb+width > 64 {
  1591  		panic("ARM(64) bit field width constant out of range")
  1592  	}
  1593  	return arm64BitField(width | lsb<<8)
  1594  }
  1595  
  1596  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1597  func (bfc arm64BitField) getARM64BFlsb() int64 {
  1598  	return int64(uint64(bfc) >> 8)
  1599  }
  1600  
  1601  // returns the width part of the auxInt field of arm64 bitfield ops.
  1602  func (bfc arm64BitField) getARM64BFwidth() int64 {
  1603  	return int64(bfc) & 0xff
  1604  }
  1605  
  1606  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1607  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1608  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1609  	return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1610  }
  1611  
  1612  // returns the bitfield width of mask >> rshift for arm64 bitfield ops
  1613  func arm64BFWidth(mask, rshift int64) int64 {
  1614  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1615  	if shiftedMask == 0 {
  1616  		panic("ARM64 BF mask is zero")
  1617  	}
  1618  	return nto(shiftedMask)
  1619  }
  1620  
  1621  // sizeof returns the size of t in bytes.
  1622  // It will panic if t is not a *types.Type.
  1623  func sizeof(t interface{}) int64 {
  1624  	return t.(*types.Type).Size()
  1625  }
  1626  
  1627  // registerizable reports whether t is a primitive type that fits in
  1628  // a register. It assumes float64 values will always fit into registers
  1629  // even if that isn't strictly true.
  1630  func registerizable(b *Block, typ *types.Type) bool {
  1631  	if typ.IsPtrShaped() || typ.IsFloat() {
  1632  		return true
  1633  	}
  1634  	if typ.IsInteger() {
  1635  		return typ.Size() <= b.Func.Config.RegSize
  1636  	}
  1637  	return false
  1638  }
  1639  
  1640  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  1641  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  1642  	f := v.Block.Func
  1643  	if !f.Config.Race {
  1644  		return false
  1645  	}
  1646  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  1647  		return false
  1648  	}
  1649  	for _, b := range f.Blocks {
  1650  		for _, v := range b.Values {
  1651  			switch v.Op {
  1652  			case OpStaticCall, OpStaticLECall:
  1653  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  1654  				// Allow calls to panic*
  1655  				s := v.Aux.(*AuxCall).Fn.String()
  1656  				switch s {
  1657  				case "runtime.racefuncenter", "runtime.racefuncexit",
  1658  					"runtime.panicdivide", "runtime.panicwrap",
  1659  					"runtime.panicshift":
  1660  					continue
  1661  				}
  1662  				// If we encountered any call, we need to keep racefunc*,
  1663  				// for accurate stacktraces.
  1664  				return false
  1665  			case OpPanicBounds, OpPanicExtend:
  1666  				// Note: these are panic generators that are ok (like the static calls above).
  1667  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  1668  				// We must keep the race functions if there are any other call types.
  1669  				return false
  1670  			}
  1671  		}
  1672  	}
  1673  	if isSameCall(sym, "runtime.racefuncenter") {
  1674  		// TODO REGISTER ABI this needs to be cleaned up.
  1675  		// If we're removing racefuncenter, remove its argument as well.
  1676  		if v.Args[0].Op != OpStore {
  1677  			if v.Op == OpStaticLECall {
  1678  				// there is no store, yet.
  1679  				return true
  1680  			}
  1681  			return false
  1682  		}
  1683  		mem := v.Args[0].Args[2]
  1684  		v.Args[0].reset(OpCopy)
  1685  		v.Args[0].AddArg(mem)
  1686  	}
  1687  	return true
  1688  }
  1689  
  1690  // symIsRO reports whether sym is a read-only global.
  1691  func symIsRO(sym interface{}) bool {
  1692  	lsym := sym.(*obj.LSym)
  1693  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  1694  }
  1695  
  1696  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  1697  func symIsROZero(sym Sym) bool {
  1698  	lsym := sym.(*obj.LSym)
  1699  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  1700  		return false
  1701  	}
  1702  	for _, b := range lsym.P {
  1703  		if b != 0 {
  1704  			return false
  1705  		}
  1706  	}
  1707  	return true
  1708  }
  1709  
  1710  // read8 reads one byte from the read-only global sym at offset off.
  1711  func read8(sym interface{}, off int64) uint8 {
  1712  	lsym := sym.(*obj.LSym)
  1713  	if off >= int64(len(lsym.P)) || off < 0 {
  1714  		// Invalid index into the global sym.
  1715  		// This can happen in dead code, so we don't want to panic.
  1716  		// Just return any value, it will eventually get ignored.
  1717  		// See issue 29215.
  1718  		return 0
  1719  	}
  1720  	return lsym.P[off]
  1721  }
  1722  
  1723  // read16 reads two bytes from the read-only global sym at offset off.
  1724  func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
  1725  	lsym := sym.(*obj.LSym)
  1726  	// lsym.P is written lazily.
  1727  	// Bytes requested after the end of lsym.P are 0.
  1728  	var src []byte
  1729  	if 0 <= off && off < int64(len(lsym.P)) {
  1730  		src = lsym.P[off:]
  1731  	}
  1732  	buf := make([]byte, 2)
  1733  	copy(buf, src)
  1734  	return byteorder.Uint16(buf)
  1735  }
  1736  
  1737  // read32 reads four bytes from the read-only global sym at offset off.
  1738  func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
  1739  	lsym := sym.(*obj.LSym)
  1740  	var src []byte
  1741  	if 0 <= off && off < int64(len(lsym.P)) {
  1742  		src = lsym.P[off:]
  1743  	}
  1744  	buf := make([]byte, 4)
  1745  	copy(buf, src)
  1746  	return byteorder.Uint32(buf)
  1747  }
  1748  
  1749  // read64 reads eight bytes from the read-only global sym at offset off.
  1750  func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
  1751  	lsym := sym.(*obj.LSym)
  1752  	var src []byte
  1753  	if 0 <= off && off < int64(len(lsym.P)) {
  1754  		src = lsym.P[off:]
  1755  	}
  1756  	buf := make([]byte, 8)
  1757  	copy(buf, src)
  1758  	return byteorder.Uint64(buf)
  1759  }
  1760  
  1761  // sequentialAddresses reports true if it can prove that x + n == y
  1762  func sequentialAddresses(x, y *Value, n int64) bool {
  1763  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  1764  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  1765  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  1766  		return true
  1767  	}
  1768  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  1769  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  1770  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  1771  		return true
  1772  	}
  1773  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  1774  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  1775  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  1776  		return true
  1777  	}
  1778  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  1779  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  1780  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  1781  		return true
  1782  	}
  1783  	return false
  1784  }
  1785  
  1786  // flagConstant represents the result of a compile-time comparison.
  1787  // The sense of these flags does not necessarily represent the hardware's notion
  1788  // of a flags register - these are just a compile-time construct.
  1789  // We happen to match the semantics to those of arm/arm64.
  1790  // Note that these semantics differ from x86: the carry flag has the opposite
  1791  // sense on a subtraction!
  1792  //   On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  1793  //   On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  1794  //    (because it does x + ^y + C).
  1795  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  1796  type flagConstant uint8
  1797  
  1798  // N reports whether the result of an operation is negative (high bit set).
  1799  func (fc flagConstant) N() bool {
  1800  	return fc&1 != 0
  1801  }
  1802  
  1803  // Z reports whether the result of an operation is 0.
  1804  func (fc flagConstant) Z() bool {
  1805  	return fc&2 != 0
  1806  }
  1807  
  1808  // C reports whether an unsigned add overflowed (carry), or an
  1809  // unsigned subtract did not underflow (borrow).
  1810  func (fc flagConstant) C() bool {
  1811  	return fc&4 != 0
  1812  }
  1813  
  1814  // V reports whether a signed operation overflowed or underflowed.
  1815  func (fc flagConstant) V() bool {
  1816  	return fc&8 != 0
  1817  }
  1818  
  1819  func (fc flagConstant) eq() bool {
  1820  	return fc.Z()
  1821  }
  1822  func (fc flagConstant) ne() bool {
  1823  	return !fc.Z()
  1824  }
  1825  func (fc flagConstant) lt() bool {
  1826  	return fc.N() != fc.V()
  1827  }
  1828  func (fc flagConstant) le() bool {
  1829  	return fc.Z() || fc.lt()
  1830  }
  1831  func (fc flagConstant) gt() bool {
  1832  	return !fc.Z() && fc.ge()
  1833  }
  1834  func (fc flagConstant) ge() bool {
  1835  	return fc.N() == fc.V()
  1836  }
  1837  func (fc flagConstant) ult() bool {
  1838  	return !fc.C()
  1839  }
  1840  func (fc flagConstant) ule() bool {
  1841  	return fc.Z() || fc.ult()
  1842  }
  1843  func (fc flagConstant) ugt() bool {
  1844  	return !fc.Z() && fc.uge()
  1845  }
  1846  func (fc flagConstant) uge() bool {
  1847  	return fc.C()
  1848  }
  1849  
  1850  func (fc flagConstant) ltNoov() bool {
  1851  	return fc.lt() && !fc.V()
  1852  }
  1853  func (fc flagConstant) leNoov() bool {
  1854  	return fc.le() && !fc.V()
  1855  }
  1856  func (fc flagConstant) gtNoov() bool {
  1857  	return fc.gt() && !fc.V()
  1858  }
  1859  func (fc flagConstant) geNoov() bool {
  1860  	return fc.ge() && !fc.V()
  1861  }
  1862  
  1863  func (fc flagConstant) String() string {
  1864  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  1865  }
  1866  
  1867  type flagConstantBuilder struct {
  1868  	N bool
  1869  	Z bool
  1870  	C bool
  1871  	V bool
  1872  }
  1873  
  1874  func (fcs flagConstantBuilder) encode() flagConstant {
  1875  	var fc flagConstant
  1876  	if fcs.N {
  1877  		fc |= 1
  1878  	}
  1879  	if fcs.Z {
  1880  		fc |= 2
  1881  	}
  1882  	if fcs.C {
  1883  		fc |= 4
  1884  	}
  1885  	if fcs.V {
  1886  		fc |= 8
  1887  	}
  1888  	return fc
  1889  }
  1890  
  1891  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  1892  //  - the results of the C flag are different
  1893  //  - the results of the V flag when y==minint are different
  1894  
  1895  // addFlags64 returns the flags that would be set from computing x+y.
  1896  func addFlags64(x, y int64) flagConstant {
  1897  	var fcb flagConstantBuilder
  1898  	fcb.Z = x+y == 0
  1899  	fcb.N = x+y < 0
  1900  	fcb.C = uint64(x+y) < uint64(x)
  1901  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  1902  	return fcb.encode()
  1903  }
  1904  
  1905  // subFlags64 returns the flags that would be set from computing x-y.
  1906  func subFlags64(x, y int64) flagConstant {
  1907  	var fcb flagConstantBuilder
  1908  	fcb.Z = x-y == 0
  1909  	fcb.N = x-y < 0
  1910  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  1911  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  1912  	return fcb.encode()
  1913  }
  1914  
  1915  // addFlags32 returns the flags that would be set from computing x+y.
  1916  func addFlags32(x, y int32) flagConstant {
  1917  	var fcb flagConstantBuilder
  1918  	fcb.Z = x+y == 0
  1919  	fcb.N = x+y < 0
  1920  	fcb.C = uint32(x+y) < uint32(x)
  1921  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  1922  	return fcb.encode()
  1923  }
  1924  
  1925  // subFlags32 returns the flags that would be set from computing x-y.
  1926  func subFlags32(x, y int32) flagConstant {
  1927  	var fcb flagConstantBuilder
  1928  	fcb.Z = x-y == 0
  1929  	fcb.N = x-y < 0
  1930  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  1931  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  1932  	return fcb.encode()
  1933  }
  1934  
  1935  // logicFlags64 returns flags set to the sign/zeroness of x.
  1936  // C and V are set to false.
  1937  func logicFlags64(x int64) flagConstant {
  1938  	var fcb flagConstantBuilder
  1939  	fcb.Z = x == 0
  1940  	fcb.N = x < 0
  1941  	return fcb.encode()
  1942  }
  1943  
  1944  // logicFlags32 returns flags set to the sign/zeroness of x.
  1945  // C and V are set to false.
  1946  func logicFlags32(x int32) flagConstant {
  1947  	var fcb flagConstantBuilder
  1948  	fcb.Z = x == 0
  1949  	fcb.N = x < 0
  1950  	return fcb.encode()
  1951  }
  1952  

View as plain text