Source file src/cmd/compile/internal/ssa/regalloc.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Register allocation.
     6  //
     7  // We use a version of a linear scan register allocator. We treat the
     8  // whole function as a single long basic block and run through
     9  // it using a greedy register allocator. Then all merge edges
    10  // (those targeting a block with len(Preds)>1) are processed to
    11  // shuffle data into the place that the target of the edge expects.
    12  //
    13  // The greedy allocator moves values into registers just before they
    14  // are used, spills registers only when necessary, and spills the
    15  // value whose next use is farthest in the future.
    16  //
    17  // The register allocator requires that a block is not scheduled until
    18  // at least one of its predecessors have been scheduled. The most recent
    19  // such predecessor provides the starting register state for a block.
    20  //
    21  // It also requires that there are no critical edges (critical =
    22  // comes from a block with >1 successor and goes to a block with >1
    23  // predecessor).  This makes it easy to add fixup code on merge edges -
    24  // the source of a merge edge has only one successor, so we can add
    25  // fixup code to the end of that block.
    26  
    27  // Spilling
    28  //
    29  // During the normal course of the allocator, we might throw a still-live
    30  // value out of all registers. When that value is subsequently used, we must
    31  // load it from a slot on the stack. We must also issue an instruction to
    32  // initialize that stack location with a copy of v.
    33  //
    34  // pre-regalloc:
    35  //   (1) v = Op ...
    36  //   (2) x = Op ...
    37  //   (3) ... = Op v ...
    38  //
    39  // post-regalloc:
    40  //   (1) v = Op ...    : AX // computes v, store result in AX
    41  //       s = StoreReg v     // spill v to a stack slot
    42  //   (2) x = Op ...    : AX // some other op uses AX
    43  //       c = LoadReg s : CX // restore v from stack slot
    44  //   (3) ... = Op c ...     // use the restored value
    45  //
    46  // Allocation occurs normally until we reach (3) and we realize we have
    47  // a use of v and it isn't in any register. At that point, we allocate
    48  // a spill (a StoreReg) for v. We can't determine the correct place for
    49  // the spill at this point, so we allocate the spill as blockless initially.
    50  // The restore is then generated to load v back into a register so it can
    51  // be used. Subsequent uses of v will use the restored value c instead.
    52  //
    53  // What remains is the question of where to schedule the spill.
    54  // During allocation, we keep track of the dominator of all restores of v.
    55  // The spill of v must dominate that block. The spill must also be issued at
    56  // a point where v is still in a register.
    57  //
    58  // To find the right place, start at b, the block which dominates all restores.
    59  //  - If b is v.Block, then issue the spill right after v.
    60  //    It is known to be in a register at that point, and dominates any restores.
    61  //  - Otherwise, if v is in a register at the start of b,
    62  //    put the spill of v at the start of b.
    63  //  - Otherwise, set b = immediate dominator of b, and repeat.
    64  //
    65  // Phi values are special, as always. We define two kinds of phis, those
    66  // where the merge happens in a register (a "register" phi) and those where
    67  // the merge happens in a stack location (a "stack" phi).
    68  //
    69  // A register phi must have the phi and all of its inputs allocated to the
    70  // same register. Register phis are spilled similarly to regular ops.
    71  //
    72  // A stack phi must have the phi and all of its inputs allocated to the same
    73  // stack location. Stack phis start out life already spilled - each phi
    74  // input must be a store (using StoreReg) at the end of the corresponding
    75  // predecessor block.
    76  //     b1: y = ... : AX        b2: z = ... : BX
    77  //         y2 = StoreReg y         z2 = StoreReg z
    78  //         goto b3                 goto b3
    79  //     b3: x = phi(y2, z2)
    80  // The stack allocator knows that StoreReg args of stack-allocated phis
    81  // must be allocated to the same stack slot as the phi that uses them.
    82  // x is now a spilled value and a restore must appear before its first use.
    83  
    84  // TODO
    85  
    86  // Use an affinity graph to mark two values which should use the
    87  // same register. This affinity graph will be used to prefer certain
    88  // registers for allocation. This affinity helps eliminate moves that
    89  // are required for phi implementations and helps generate allocations
    90  // for 2-register architectures.
    91  
    92  // Note: regalloc generates a not-quite-SSA output. If we have:
    93  //
    94  //             b1: x = ... : AX
    95  //                 x2 = StoreReg x
    96  //                 ... AX gets reused for something else ...
    97  //                 if ... goto b3 else b4
    98  //
    99  //   b3: x3 = LoadReg x2 : BX       b4: x4 = LoadReg x2 : CX
   100  //       ... use x3 ...                 ... use x4 ...
   101  //
   102  //             b2: ... use x3 ...
   103  //
   104  // If b3 is the primary predecessor of b2, then we use x3 in b2 and
   105  // add a x4:CX->BX copy at the end of b4.
   106  // But the definition of x3 doesn't dominate b2.  We should really
   107  // insert an extra phi at the start of b2 (x5=phi(x3,x4):BX) to keep
   108  // SSA form. For now, we ignore this problem as remaining in strict
   109  // SSA form isn't needed after regalloc. We'll just leave the use
   110  // of x3 not dominated by the definition of x3, and the CX->BX copy
   111  // will have no use (so don't run deadcode after regalloc!).
   112  // TODO: maybe we should introduce these extra phis?
   113  
   114  package ssa
   115  
   116  import (
   117  	"cmd/compile/internal/base"
   118  	"cmd/compile/internal/ir"
   119  	"cmd/compile/internal/types"
   120  	"cmd/internal/src"
   121  	"cmd/internal/sys"
   122  	"fmt"
   123  	"internal/buildcfg"
   124  	"math/bits"
   125  	"unsafe"
   126  )
   127  
   128  const (
   129  	moveSpills = iota
   130  	logSpills
   131  	regDebug
   132  	stackDebug
   133  )
   134  
   135  // distance is a measure of how far into the future values are used.
   136  // distance is measured in units of instructions.
   137  const (
   138  	likelyDistance   = 1
   139  	normalDistance   = 10
   140  	unlikelyDistance = 100
   141  )
   142  
   143  // regalloc performs register allocation on f. It sets f.RegAlloc
   144  // to the resulting allocation.
   145  func regalloc(f *Func) {
   146  	var s regAllocState
   147  	s.init(f)
   148  	s.regalloc(f)
   149  }
   150  
   151  type register uint8
   152  
   153  const noRegister register = 255
   154  
   155  // For bulk initializing
   156  var noRegisters [32]register = [32]register{
   157  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   158  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   159  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   160  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   161  }
   162  
   163  // A regMask encodes a set of machine registers.
   164  // TODO: regMask -> regSet?
   165  type regMask uint64
   166  
   167  func (m regMask) String() string {
   168  	s := ""
   169  	for r := register(0); m != 0; r++ {
   170  		if m>>r&1 == 0 {
   171  			continue
   172  		}
   173  		m &^= regMask(1) << r
   174  		if s != "" {
   175  			s += " "
   176  		}
   177  		s += fmt.Sprintf("r%d", r)
   178  	}
   179  	return s
   180  }
   181  
   182  func (s *regAllocState) RegMaskString(m regMask) string {
   183  	str := ""
   184  	for r := register(0); m != 0; r++ {
   185  		if m>>r&1 == 0 {
   186  			continue
   187  		}
   188  		m &^= regMask(1) << r
   189  		if str != "" {
   190  			str += " "
   191  		}
   192  		str += s.registers[r].String()
   193  	}
   194  	return str
   195  }
   196  
   197  // countRegs returns the number of set bits in the register mask.
   198  func countRegs(r regMask) int {
   199  	return bits.OnesCount64(uint64(r))
   200  }
   201  
   202  // pickReg picks an arbitrary register from the register mask.
   203  func pickReg(r regMask) register {
   204  	if r == 0 {
   205  		panic("can't pick a register from an empty set")
   206  	}
   207  	// pick the lowest one
   208  	return register(bits.TrailingZeros64(uint64(r)))
   209  }
   210  
   211  type use struct {
   212  	dist int32    // distance from start of the block to a use of a value
   213  	pos  src.XPos // source position of the use
   214  	next *use     // linked list of uses of a value in nondecreasing dist order
   215  }
   216  
   217  // A valState records the register allocation state for a (pre-regalloc) value.
   218  type valState struct {
   219  	regs              regMask // the set of registers holding a Value (usually just one)
   220  	uses              *use    // list of uses in this block
   221  	spill             *Value  // spilled copy of the Value (if any)
   222  	restoreMin        int32   // minimum of all restores' blocks' sdom.entry
   223  	restoreMax        int32   // maximum of all restores' blocks' sdom.exit
   224  	needReg           bool    // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags()
   225  	rematerializeable bool    // cached value of v.rematerializeable()
   226  }
   227  
   228  type regState struct {
   229  	v *Value // Original (preregalloc) Value stored in this register.
   230  	c *Value // A Value equal to v which is currently in a register.  Might be v or a copy of it.
   231  	// If a register is unused, v==c==nil
   232  }
   233  
   234  type regAllocState struct {
   235  	f *Func
   236  
   237  	sdom        SparseTree
   238  	registers   []Register
   239  	numRegs     register
   240  	SPReg       register
   241  	SBReg       register
   242  	GReg        register
   243  	allocatable regMask
   244  
   245  	// live values at the end of each block.  live[b.ID] is a list of value IDs
   246  	// which are live at the end of b, together with a count of how many instructions
   247  	// forward to the next use.
   248  	live [][]liveInfo
   249  	// desired register assignments at the end of each block.
   250  	// Note that this is a static map computed before allocation occurs. Dynamic
   251  	// register desires (from partially completed allocations) will trump
   252  	// this information.
   253  	desired []desiredState
   254  
   255  	// current state of each (preregalloc) Value
   256  	values []valState
   257  
   258  	// ID of SP, SB values
   259  	sp, sb ID
   260  
   261  	// For each Value, map from its value ID back to the
   262  	// preregalloc Value it was derived from.
   263  	orig []*Value
   264  
   265  	// current state of each register
   266  	regs []regState
   267  
   268  	// registers that contain values which can't be kicked out
   269  	nospill regMask
   270  
   271  	// mask of registers currently in use
   272  	used regMask
   273  
   274  	// mask of registers used in the current instruction
   275  	tmpused regMask
   276  
   277  	// current block we're working on
   278  	curBlock *Block
   279  
   280  	// cache of use records
   281  	freeUseRecords *use
   282  
   283  	// endRegs[blockid] is the register state at the end of each block.
   284  	// encoded as a set of endReg records.
   285  	endRegs [][]endReg
   286  
   287  	// startRegs[blockid] is the register state at the start of merge blocks.
   288  	// saved state does not include the state of phi ops in the block.
   289  	startRegs [][]startReg
   290  
   291  	// spillLive[blockid] is the set of live spills at the end of each block
   292  	spillLive [][]ID
   293  
   294  	// a set of copies we generated to move things around, and
   295  	// whether it is used in shuffle. Unused copies will be deleted.
   296  	copies map[*Value]bool
   297  
   298  	loopnest *loopnest
   299  
   300  	// choose a good order in which to visit blocks for allocation purposes.
   301  	visitOrder []*Block
   302  
   303  	// blockOrder[b.ID] corresponds to the index of block b in visitOrder.
   304  	blockOrder []int32
   305  
   306  	// whether to insert instructions that clobber dead registers at call sites
   307  	doClobber bool
   308  }
   309  
   310  type endReg struct {
   311  	r register
   312  	v *Value // pre-regalloc value held in this register (TODO: can we use ID here?)
   313  	c *Value // cached version of the value
   314  }
   315  
   316  type startReg struct {
   317  	r   register
   318  	v   *Value   // pre-regalloc value needed in this register
   319  	c   *Value   // cached version of the value
   320  	pos src.XPos // source position of use of this register
   321  }
   322  
   323  // freeReg frees up register r. Any current user of r is kicked out.
   324  func (s *regAllocState) freeReg(r register) {
   325  	v := s.regs[r].v
   326  	if v == nil {
   327  		s.f.Fatalf("tried to free an already free register %d\n", r)
   328  	}
   329  
   330  	// Mark r as unused.
   331  	if s.f.pass.debug > regDebug {
   332  		fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
   333  	}
   334  	s.regs[r] = regState{}
   335  	s.values[v.ID].regs &^= regMask(1) << r
   336  	s.used &^= regMask(1) << r
   337  }
   338  
   339  // freeRegs frees up all registers listed in m.
   340  func (s *regAllocState) freeRegs(m regMask) {
   341  	for m&s.used != 0 {
   342  		s.freeReg(pickReg(m & s.used))
   343  	}
   344  }
   345  
   346  // clobberRegs inserts instructions that clobber registers listed in m.
   347  func (s *regAllocState) clobberRegs(m regMask) {
   348  	m &= s.allocatable & s.f.Config.gpRegMask // only integer register can contain pointers, only clobber them
   349  	for m != 0 {
   350  		r := pickReg(m)
   351  		m &^= 1 << r
   352  		x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
   353  		s.f.setHome(x, &s.registers[r])
   354  	}
   355  }
   356  
   357  // setOrig records that c's original value is the same as
   358  // v's original value.
   359  func (s *regAllocState) setOrig(c *Value, v *Value) {
   360  	for int(c.ID) >= len(s.orig) {
   361  		s.orig = append(s.orig, nil)
   362  	}
   363  	if s.orig[c.ID] != nil {
   364  		s.f.Fatalf("orig value set twice %s %s", c, v)
   365  	}
   366  	s.orig[c.ID] = s.orig[v.ID]
   367  }
   368  
   369  // assignReg assigns register r to hold c, a copy of v.
   370  // r must be unused.
   371  func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
   372  	if s.f.pass.debug > regDebug {
   373  		fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
   374  	}
   375  	if s.regs[r].v != nil {
   376  		s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
   377  	}
   378  
   379  	// Update state.
   380  	s.regs[r] = regState{v, c}
   381  	s.values[v.ID].regs |= regMask(1) << r
   382  	s.used |= regMask(1) << r
   383  	s.f.setHome(c, &s.registers[r])
   384  }
   385  
   386  // allocReg chooses a register from the set of registers in mask.
   387  // If there is no unused register, a Value will be kicked out of
   388  // a register to make room.
   389  func (s *regAllocState) allocReg(mask regMask, v *Value) register {
   390  	if v.OnWasmStack {
   391  		return noRegister
   392  	}
   393  
   394  	mask &= s.allocatable
   395  	mask &^= s.nospill
   396  	if mask == 0 {
   397  		s.f.Fatalf("no register available for %s", v.LongString())
   398  	}
   399  
   400  	// Pick an unused register if one is available.
   401  	if mask&^s.used != 0 {
   402  		return pickReg(mask &^ s.used)
   403  	}
   404  
   405  	// Pick a value to spill. Spill the value with the
   406  	// farthest-in-the-future use.
   407  	// TODO: Prefer registers with already spilled Values?
   408  	// TODO: Modify preference using affinity graph.
   409  	// TODO: if a single value is in multiple registers, spill one of them
   410  	// before spilling a value in just a single register.
   411  
   412  	// Find a register to spill. We spill the register containing the value
   413  	// whose next use is as far in the future as possible.
   414  	// https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm
   415  	var r register
   416  	maxuse := int32(-1)
   417  	for t := register(0); t < s.numRegs; t++ {
   418  		if mask>>t&1 == 0 {
   419  			continue
   420  		}
   421  		v := s.regs[t].v
   422  		if n := s.values[v.ID].uses.dist; n > maxuse {
   423  			// v's next use is farther in the future than any value
   424  			// we've seen so far. A new best spill candidate.
   425  			r = t
   426  			maxuse = n
   427  		}
   428  	}
   429  	if maxuse == -1 {
   430  		s.f.Fatalf("couldn't find register to spill")
   431  	}
   432  
   433  	if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
   434  		// TODO(neelance): In theory this should never happen, because all wasm registers are equal.
   435  		// So if there is still a free register, the allocation should have picked that one in the first place instead of
   436  		// trying to kick some other value out. In practice, this case does happen and it breaks the stack optimization.
   437  		s.freeReg(r)
   438  		return r
   439  	}
   440  
   441  	// Try to move it around before kicking out, if there is a free register.
   442  	// We generate a Copy and record it. It will be deleted if never used.
   443  	v2 := s.regs[r].v
   444  	m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
   445  	if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
   446  		r2 := pickReg(m)
   447  		c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
   448  		s.copies[c] = false
   449  		if s.f.pass.debug > regDebug {
   450  			fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
   451  		}
   452  		s.setOrig(c, v2)
   453  		s.assignReg(r2, v2, c)
   454  	}
   455  	s.freeReg(r)
   456  	return r
   457  }
   458  
   459  // makeSpill returns a Value which represents the spilled value of v.
   460  // b is the block in which the spill is used.
   461  func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
   462  	vi := &s.values[v.ID]
   463  	if vi.spill != nil {
   464  		// Final block not known - keep track of subtree where restores reside.
   465  		vi.restoreMin = min32(vi.restoreMin, s.sdom[b.ID].entry)
   466  		vi.restoreMax = max32(vi.restoreMax, s.sdom[b.ID].exit)
   467  		return vi.spill
   468  	}
   469  	// Make a spill for v. We don't know where we want
   470  	// to put it yet, so we leave it blockless for now.
   471  	spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
   472  	// We also don't know what the spill's arg will be.
   473  	// Leave it argless for now.
   474  	s.setOrig(spill, v)
   475  	vi.spill = spill
   476  	vi.restoreMin = s.sdom[b.ID].entry
   477  	vi.restoreMax = s.sdom[b.ID].exit
   478  	return spill
   479  }
   480  
   481  // allocValToReg allocates v to a register selected from regMask and
   482  // returns the register copy of v. Any previous user is kicked out and spilled
   483  // (if necessary). Load code is added at the current pc. If nospill is set the
   484  // allocated register is marked nospill so the assignment cannot be
   485  // undone until the caller allows it by clearing nospill. Returns a
   486  // *Value which is either v or a copy of v allocated to the chosen register.
   487  func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
   488  	if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
   489  		c := v.copyIntoWithXPos(s.curBlock, pos)
   490  		c.OnWasmStack = true
   491  		s.setOrig(c, v)
   492  		return c
   493  	}
   494  	if v.OnWasmStack {
   495  		return v
   496  	}
   497  
   498  	vi := &s.values[v.ID]
   499  	pos = pos.WithNotStmt()
   500  	// Check if v is already in a requested register.
   501  	if mask&vi.regs != 0 {
   502  		r := pickReg(mask & vi.regs)
   503  		if s.regs[r].v != v || s.regs[r].c == nil {
   504  			panic("bad register state")
   505  		}
   506  		if nospill {
   507  			s.nospill |= regMask(1) << r
   508  		}
   509  		return s.regs[r].c
   510  	}
   511  
   512  	var r register
   513  	// If nospill is set, the value is used immediately, so it can live on the WebAssembly stack.
   514  	onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
   515  	if !onWasmStack {
   516  		// Allocate a register.
   517  		r = s.allocReg(mask, v)
   518  	}
   519  
   520  	// Allocate v to the new register.
   521  	var c *Value
   522  	if vi.regs != 0 {
   523  		// Copy from a register that v is already in.
   524  		r2 := pickReg(vi.regs)
   525  		if s.regs[r2].v != v {
   526  			panic("bad register state")
   527  		}
   528  		c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c)
   529  	} else if v.rematerializeable() {
   530  		// Rematerialize instead of loading from the spill location.
   531  		c = v.copyIntoWithXPos(s.curBlock, pos)
   532  	} else {
   533  		// Load v from its spill location.
   534  		spill := s.makeSpill(v, s.curBlock)
   535  		if s.f.pass.debug > logSpills {
   536  			s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
   537  		}
   538  		c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
   539  	}
   540  
   541  	s.setOrig(c, v)
   542  
   543  	if onWasmStack {
   544  		c.OnWasmStack = true
   545  		return c
   546  	}
   547  
   548  	s.assignReg(r, v, c)
   549  	if c.Op == OpLoadReg && s.isGReg(r) {
   550  		s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
   551  	}
   552  	if nospill {
   553  		s.nospill |= regMask(1) << r
   554  	}
   555  	return c
   556  }
   557  
   558  // isLeaf reports whether f performs any calls.
   559  func isLeaf(f *Func) bool {
   560  	for _, b := range f.Blocks {
   561  		for _, v := range b.Values {
   562  			if v.Op.IsCall() && !v.Op.IsTailCall() {
   563  				// tail call is not counted as it does not save the return PC or need a frame
   564  				return false
   565  			}
   566  		}
   567  	}
   568  	return true
   569  }
   570  
   571  func (s *regAllocState) init(f *Func) {
   572  	s.f = f
   573  	s.f.RegAlloc = s.f.Cache.locs[:0]
   574  	s.registers = f.Config.registers
   575  	if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
   576  		s.f.Fatalf("bad number of registers: %d", nr)
   577  	} else {
   578  		s.numRegs = register(nr)
   579  	}
   580  	// Locate SP, SB, and g registers.
   581  	s.SPReg = noRegister
   582  	s.SBReg = noRegister
   583  	s.GReg = noRegister
   584  	for r := register(0); r < s.numRegs; r++ {
   585  		switch s.registers[r].String() {
   586  		case "SP":
   587  			s.SPReg = r
   588  		case "SB":
   589  			s.SBReg = r
   590  		case "g":
   591  			s.GReg = r
   592  		}
   593  	}
   594  	// Make sure we found all required registers.
   595  	switch noRegister {
   596  	case s.SPReg:
   597  		s.f.Fatalf("no SP register found")
   598  	case s.SBReg:
   599  		s.f.Fatalf("no SB register found")
   600  	case s.GReg:
   601  		if f.Config.hasGReg {
   602  			s.f.Fatalf("no g register found")
   603  		}
   604  	}
   605  
   606  	// Figure out which registers we're allowed to use.
   607  	s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
   608  	s.allocatable &^= 1 << s.SPReg
   609  	s.allocatable &^= 1 << s.SBReg
   610  	if s.f.Config.hasGReg {
   611  		s.allocatable &^= 1 << s.GReg
   612  	}
   613  	if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
   614  		s.allocatable &^= 1 << uint(s.f.Config.FPReg)
   615  	}
   616  	if s.f.Config.LinkReg != -1 {
   617  		if isLeaf(f) {
   618  			// Leaf functions don't save/restore the link register.
   619  			s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
   620  		}
   621  	}
   622  	if s.f.Config.ctxt.Flag_dynlink {
   623  		switch s.f.Config.arch {
   624  		case "386":
   625  			// nothing to do.
   626  			// Note that for Flag_shared (position independent code)
   627  			// we do need to be careful, but that carefulness is hidden
   628  			// in the rewrite rules so we always have a free register
   629  			// available for global load/stores. See gen/386.rules (search for Flag_shared).
   630  		case "amd64":
   631  			s.allocatable &^= 1 << 15 // R15
   632  		case "arm":
   633  			s.allocatable &^= 1 << 9 // R9
   634  		case "arm64":
   635  			// nothing to do
   636  		case "ppc64le": // R2 already reserved.
   637  			// nothing to do
   638  		case "riscv64": // X3 (aka GP) and X4 (aka TP) already reserved.
   639  			// nothing to do
   640  		case "s390x":
   641  			s.allocatable &^= 1 << 11 // R11
   642  		default:
   643  			s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
   644  		}
   645  	}
   646  
   647  	// Linear scan register allocation can be influenced by the order in which blocks appear.
   648  	// Decouple the register allocation order from the generated block order.
   649  	// This also creates an opportunity for experiments to find a better order.
   650  	s.visitOrder = layoutRegallocOrder(f)
   651  
   652  	// Compute block order. This array allows us to distinguish forward edges
   653  	// from backward edges and compute how far they go.
   654  	s.blockOrder = make([]int32, f.NumBlocks())
   655  	for i, b := range s.visitOrder {
   656  		s.blockOrder[b.ID] = int32(i)
   657  	}
   658  
   659  	s.regs = make([]regState, s.numRegs)
   660  	nv := f.NumValues()
   661  	if cap(s.f.Cache.regallocValues) >= nv {
   662  		s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
   663  	} else {
   664  		s.f.Cache.regallocValues = make([]valState, nv)
   665  	}
   666  	s.values = s.f.Cache.regallocValues
   667  	s.orig = make([]*Value, nv)
   668  	s.copies = make(map[*Value]bool)
   669  	for _, b := range s.visitOrder {
   670  		for _, v := range b.Values {
   671  			if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() {
   672  				s.values[v.ID].needReg = true
   673  				s.values[v.ID].rematerializeable = v.rematerializeable()
   674  				s.orig[v.ID] = v
   675  			}
   676  			// Note: needReg is false for values returning Tuple types.
   677  			// Instead, we mark the corresponding Selects as needReg.
   678  		}
   679  	}
   680  	s.computeLive()
   681  
   682  	s.endRegs = make([][]endReg, f.NumBlocks())
   683  	s.startRegs = make([][]startReg, f.NumBlocks())
   684  	s.spillLive = make([][]ID, f.NumBlocks())
   685  	s.sdom = f.Sdom()
   686  
   687  	// wasm: Mark instructions that can be optimized to have their values only on the WebAssembly stack.
   688  	if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
   689  		canLiveOnStack := f.newSparseSet(f.NumValues())
   690  		defer f.retSparseSet(canLiveOnStack)
   691  		for _, b := range f.Blocks {
   692  			// New block. Clear candidate set.
   693  			canLiveOnStack.clear()
   694  			for _, c := range b.ControlValues() {
   695  				if c.Uses == 1 && !opcodeTable[c.Op].generic {
   696  					canLiveOnStack.add(c.ID)
   697  				}
   698  			}
   699  			// Walking backwards.
   700  			for i := len(b.Values) - 1; i >= 0; i-- {
   701  				v := b.Values[i]
   702  				if canLiveOnStack.contains(v.ID) {
   703  					v.OnWasmStack = true
   704  				} else {
   705  					// Value can not live on stack. Values are not allowed to be reordered, so clear candidate set.
   706  					canLiveOnStack.clear()
   707  				}
   708  				for _, arg := range v.Args {
   709  					// Value can live on the stack if:
   710  					// - it is only used once
   711  					// - it is used in the same basic block
   712  					// - it is not a "mem" value
   713  					// - it is a WebAssembly op
   714  					if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
   715  						canLiveOnStack.add(arg.ID)
   716  					}
   717  				}
   718  			}
   719  		}
   720  	}
   721  
   722  	// The clobberdeadreg experiment inserts code to clobber dead registers
   723  	// at call sites.
   724  	// Ignore huge functions to avoid doing too much work.
   725  	if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
   726  		// TODO: honor GOCLOBBERDEADHASH, or maybe GOSSAHASH.
   727  		s.doClobber = true
   728  	}
   729  }
   730  
   731  // Adds a use record for id at distance dist from the start of the block.
   732  // All calls to addUse must happen with nonincreasing dist.
   733  func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
   734  	r := s.freeUseRecords
   735  	if r != nil {
   736  		s.freeUseRecords = r.next
   737  	} else {
   738  		r = &use{}
   739  	}
   740  	r.dist = dist
   741  	r.pos = pos
   742  	r.next = s.values[id].uses
   743  	s.values[id].uses = r
   744  	if r.next != nil && dist > r.next.dist {
   745  		s.f.Fatalf("uses added in wrong order")
   746  	}
   747  }
   748  
   749  // advanceUses advances the uses of v's args from the state before v to the state after v.
   750  // Any values which have no more uses are deallocated from registers.
   751  func (s *regAllocState) advanceUses(v *Value) {
   752  	for _, a := range v.Args {
   753  		if !s.values[a.ID].needReg {
   754  			continue
   755  		}
   756  		ai := &s.values[a.ID]
   757  		r := ai.uses
   758  		ai.uses = r.next
   759  		if r.next == nil {
   760  			// Value is dead, free all registers that hold it.
   761  			s.freeRegs(ai.regs)
   762  		}
   763  		r.next = s.freeUseRecords
   764  		s.freeUseRecords = r
   765  	}
   766  }
   767  
   768  // liveAfterCurrentInstruction reports whether v is live after
   769  // the current instruction is completed.  v must be used by the
   770  // current instruction.
   771  func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
   772  	u := s.values[v.ID].uses
   773  	if u == nil {
   774  		panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
   775  	}
   776  	d := u.dist
   777  	for u != nil && u.dist == d {
   778  		u = u.next
   779  	}
   780  	return u != nil && u.dist > d
   781  }
   782  
   783  // Sets the state of the registers to that encoded in regs.
   784  func (s *regAllocState) setState(regs []endReg) {
   785  	s.freeRegs(s.used)
   786  	for _, x := range regs {
   787  		s.assignReg(x.r, x.v, x.c)
   788  	}
   789  }
   790  
   791  // compatRegs returns the set of registers which can store a type t.
   792  func (s *regAllocState) compatRegs(t *types.Type) regMask {
   793  	var m regMask
   794  	if t.IsTuple() || t.IsFlags() {
   795  		return 0
   796  	}
   797  	if t.IsFloat() || t == types.TypeInt128 {
   798  		if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
   799  			m = s.f.Config.fp32RegMask
   800  		} else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
   801  			m = s.f.Config.fp64RegMask
   802  		} else {
   803  			m = s.f.Config.fpRegMask
   804  		}
   805  	} else {
   806  		m = s.f.Config.gpRegMask
   807  	}
   808  	return m & s.allocatable
   809  }
   810  
   811  // regspec returns the regInfo for operation op.
   812  func (s *regAllocState) regspec(v *Value) regInfo {
   813  	op := v.Op
   814  	if op == OpConvert {
   815  		// OpConvert is a generic op, so it doesn't have a
   816  		// register set in the static table. It can use any
   817  		// allocatable integer register.
   818  		m := s.allocatable & s.f.Config.gpRegMask
   819  		return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
   820  	}
   821  	if op == OpArgIntReg {
   822  		reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
   823  		return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
   824  	}
   825  	if op == OpArgFloatReg {
   826  		reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
   827  		return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
   828  	}
   829  	if op.IsCall() {
   830  		if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
   831  			return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
   832  		}
   833  	}
   834  	if op == OpMakeResult && s.f.OwnAux.reg != nil {
   835  		return *s.f.OwnAux.ResultReg(s.f.Config)
   836  	}
   837  	return opcodeTable[op].reg
   838  }
   839  
   840  func (s *regAllocState) isGReg(r register) bool {
   841  	return s.f.Config.hasGReg && s.GReg == r
   842  }
   843  
   844  func (s *regAllocState) regalloc(f *Func) {
   845  	regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register
   846  	defer f.retSparseSet(regValLiveSet)
   847  	var oldSched []*Value
   848  	var phis []*Value
   849  	var phiRegs []register
   850  	var args []*Value
   851  
   852  	// Data structure used for computing desired registers.
   853  	var desired desiredState
   854  
   855  	// Desired registers for inputs & outputs for each instruction in the block.
   856  	type dentry struct {
   857  		out [4]register    // desired output registers
   858  		in  [3][4]register // desired input registers (for inputs 0,1, and 2)
   859  	}
   860  	var dinfo []dentry
   861  
   862  	if f.Entry != f.Blocks[0] {
   863  		f.Fatalf("entry block must be first")
   864  	}
   865  
   866  	for _, b := range s.visitOrder {
   867  		if s.f.pass.debug > regDebug {
   868  			fmt.Printf("Begin processing block %v\n", b)
   869  		}
   870  		s.curBlock = b
   871  
   872  		// Initialize regValLiveSet and uses fields for this block.
   873  		// Walk backwards through the block doing liveness analysis.
   874  		regValLiveSet.clear()
   875  		for _, e := range s.live[b.ID] {
   876  			s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block
   877  			regValLiveSet.add(e.ID)
   878  		}
   879  		for _, v := range b.ControlValues() {
   880  			if s.values[v.ID].needReg {
   881  				s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control values
   882  				regValLiveSet.add(v.ID)
   883  			}
   884  		}
   885  		for i := len(b.Values) - 1; i >= 0; i-- {
   886  			v := b.Values[i]
   887  			regValLiveSet.remove(v.ID)
   888  			if v.Op == OpPhi {
   889  				// Remove v from the live set, but don't add
   890  				// any inputs. This is the state the len(b.Preds)>1
   891  				// case below desires; it wants to process phis specially.
   892  				continue
   893  			}
   894  			if opcodeTable[v.Op].call {
   895  				// Function call clobbers all the registers but SP and SB.
   896  				regValLiveSet.clear()
   897  				if s.sp != 0 && s.values[s.sp].uses != nil {
   898  					regValLiveSet.add(s.sp)
   899  				}
   900  				if s.sb != 0 && s.values[s.sb].uses != nil {
   901  					regValLiveSet.add(s.sb)
   902  				}
   903  			}
   904  			for _, a := range v.Args {
   905  				if !s.values[a.ID].needReg {
   906  					continue
   907  				}
   908  				s.addUse(a.ID, int32(i), v.Pos)
   909  				regValLiveSet.add(a.ID)
   910  			}
   911  		}
   912  		if s.f.pass.debug > regDebug {
   913  			fmt.Printf("use distances for %s\n", b)
   914  			for i := range s.values {
   915  				vi := &s.values[i]
   916  				u := vi.uses
   917  				if u == nil {
   918  					continue
   919  				}
   920  				fmt.Printf("  v%d:", i)
   921  				for u != nil {
   922  					fmt.Printf(" %d", u.dist)
   923  					u = u.next
   924  				}
   925  				fmt.Println()
   926  			}
   927  		}
   928  
   929  		// Make a copy of the block schedule so we can generate a new one in place.
   930  		// We make a separate copy for phis and regular values.
   931  		nphi := 0
   932  		for _, v := range b.Values {
   933  			if v.Op != OpPhi {
   934  				break
   935  			}
   936  			nphi++
   937  		}
   938  		phis = append(phis[:0], b.Values[:nphi]...)
   939  		oldSched = append(oldSched[:0], b.Values[nphi:]...)
   940  		b.Values = b.Values[:0]
   941  
   942  		// Initialize start state of block.
   943  		if b == f.Entry {
   944  			// Regalloc state is empty to start.
   945  			if nphi > 0 {
   946  				f.Fatalf("phis in entry block")
   947  			}
   948  		} else if len(b.Preds) == 1 {
   949  			// Start regalloc state with the end state of the previous block.
   950  			s.setState(s.endRegs[b.Preds[0].b.ID])
   951  			if nphi > 0 {
   952  				f.Fatalf("phis in single-predecessor block")
   953  			}
   954  			// Drop any values which are no longer live.
   955  			// This may happen because at the end of p, a value may be
   956  			// live but only used by some other successor of p.
   957  			for r := register(0); r < s.numRegs; r++ {
   958  				v := s.regs[r].v
   959  				if v != nil && !regValLiveSet.contains(v.ID) {
   960  					s.freeReg(r)
   961  				}
   962  			}
   963  		} else {
   964  			// This is the complicated case. We have more than one predecessor,
   965  			// which means we may have Phi ops.
   966  
   967  			// Start with the final register state of the predecessor with least spill values.
   968  			// This is based on the following points:
   969  			// 1, The less spill value indicates that the register pressure of this path is smaller,
   970  			//    so the values of this block are more likely to be allocated to registers.
   971  			// 2, Avoid the predecessor that contains the function call, because the predecessor that
   972  			//    contains the function call usually generates a lot of spills and lose the previous
   973  			//    allocation state.
   974  			// TODO: Improve this part. At least the size of endRegs of the predecessor also has
   975  			// an impact on the code size and compiler speed. But it is not easy to find a simple
   976  			// and efficient method that combines multiple factors.
   977  			idx := -1
   978  			for i, p := range b.Preds {
   979  				// If the predecessor has not been visited yet, skip it because its end state
   980  				// (redRegs and spillLive) has not been computed yet.
   981  				pb := p.b
   982  				if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
   983  					continue
   984  				}
   985  				if idx == -1 {
   986  					idx = i
   987  					continue
   988  				}
   989  				pSel := b.Preds[idx].b
   990  				if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
   991  					idx = i
   992  				} else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
   993  					// Use a bit of likely information. After critical pass, pb and pSel must
   994  					// be plain blocks, so check edge pb->pb.Preds instead of edge pb->b.
   995  					// TODO: improve the prediction of the likely predecessor. The following
   996  					// method is only suitable for the simplest cases. For complex cases,
   997  					// the prediction may be inaccurate, but this does not affect the
   998  					// correctness of the program.
   999  					// According to the layout algorithm, the predecessor with the
  1000  					// smaller blockOrder is the true branch, and the test results show
  1001  					// that it is better to choose the predecessor with a smaller
  1002  					// blockOrder than no choice.
  1003  					if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
  1004  						idx = i
  1005  					}
  1006  				}
  1007  			}
  1008  			if idx < 0 {
  1009  				f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
  1010  			}
  1011  			p := b.Preds[idx].b
  1012  			s.setState(s.endRegs[p.ID])
  1013  
  1014  			if s.f.pass.debug > regDebug {
  1015  				fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
  1016  				for _, x := range s.endRegs[p.ID] {
  1017  					fmt.Printf("  %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
  1018  				}
  1019  			}
  1020  
  1021  			// Decide on registers for phi ops. Use the registers determined
  1022  			// by the primary predecessor if we can.
  1023  			// TODO: pick best of (already processed) predecessors?
  1024  			// Majority vote? Deepest nesting level?
  1025  			phiRegs = phiRegs[:0]
  1026  			var phiUsed regMask
  1027  
  1028  			for _, v := range phis {
  1029  				if !s.values[v.ID].needReg {
  1030  					phiRegs = append(phiRegs, noRegister)
  1031  					continue
  1032  				}
  1033  				a := v.Args[idx]
  1034  				// Some instructions target not-allocatable registers.
  1035  				// They're not suitable for further (phi-function) allocation.
  1036  				m := s.values[a.ID].regs &^ phiUsed & s.allocatable
  1037  				if m != 0 {
  1038  					r := pickReg(m)
  1039  					phiUsed |= regMask(1) << r
  1040  					phiRegs = append(phiRegs, r)
  1041  				} else {
  1042  					phiRegs = append(phiRegs, noRegister)
  1043  				}
  1044  			}
  1045  
  1046  			// Second pass - deallocate all in-register phi inputs.
  1047  			for i, v := range phis {
  1048  				if !s.values[v.ID].needReg {
  1049  					continue
  1050  				}
  1051  				a := v.Args[idx]
  1052  				r := phiRegs[i]
  1053  				if r == noRegister {
  1054  					continue
  1055  				}
  1056  				if regValLiveSet.contains(a.ID) {
  1057  					// Input value is still live (it is used by something other than Phi).
  1058  					// Try to move it around before kicking out, if there is a free register.
  1059  					// We generate a Copy in the predecessor block and record it. It will be
  1060  					// deleted later if never used.
  1061  					//
  1062  					// Pick a free register. At this point some registers used in the predecessor
  1063  					// block may have been deallocated. Those are the ones used for Phis. Exclude
  1064  					// them (and they are not going to be helpful anyway).
  1065  					m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
  1066  					if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
  1067  						r2 := pickReg(m)
  1068  						c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
  1069  						s.copies[c] = false
  1070  						if s.f.pass.debug > regDebug {
  1071  							fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
  1072  						}
  1073  						s.setOrig(c, a)
  1074  						s.assignReg(r2, a, c)
  1075  						s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
  1076  					}
  1077  				}
  1078  				s.freeReg(r)
  1079  			}
  1080  
  1081  			// Copy phi ops into new schedule.
  1082  			b.Values = append(b.Values, phis...)
  1083  
  1084  			// Third pass - pick registers for phis whose input
  1085  			// was not in a register in the primary predecessor.
  1086  			for i, v := range phis {
  1087  				if !s.values[v.ID].needReg {
  1088  					continue
  1089  				}
  1090  				if phiRegs[i] != noRegister {
  1091  					continue
  1092  				}
  1093  				m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
  1094  				// If one of the other inputs of v is in a register, and the register is available,
  1095  				// select this register, which can save some unnecessary copies.
  1096  				for i, pe := range b.Preds {
  1097  					if i == idx {
  1098  						continue
  1099  					}
  1100  					ri := noRegister
  1101  					for _, er := range s.endRegs[pe.b.ID] {
  1102  						if er.v == s.orig[v.Args[i].ID] {
  1103  							ri = er.r
  1104  							break
  1105  						}
  1106  					}
  1107  					if ri != noRegister && m>>ri&1 != 0 {
  1108  						m = regMask(1) << ri
  1109  						break
  1110  					}
  1111  				}
  1112  				if m != 0 {
  1113  					r := pickReg(m)
  1114  					phiRegs[i] = r
  1115  					phiUsed |= regMask(1) << r
  1116  				}
  1117  			}
  1118  
  1119  			// Set registers for phis. Add phi spill code.
  1120  			for i, v := range phis {
  1121  				if !s.values[v.ID].needReg {
  1122  					continue
  1123  				}
  1124  				r := phiRegs[i]
  1125  				if r == noRegister {
  1126  					// stack-based phi
  1127  					// Spills will be inserted in all the predecessors below.
  1128  					s.values[v.ID].spill = v // v starts life spilled
  1129  					continue
  1130  				}
  1131  				// register-based phi
  1132  				s.assignReg(r, v, v)
  1133  			}
  1134  
  1135  			// Deallocate any values which are no longer live. Phis are excluded.
  1136  			for r := register(0); r < s.numRegs; r++ {
  1137  				if phiUsed>>r&1 != 0 {
  1138  					continue
  1139  				}
  1140  				v := s.regs[r].v
  1141  				if v != nil && !regValLiveSet.contains(v.ID) {
  1142  					s.freeReg(r)
  1143  				}
  1144  			}
  1145  
  1146  			// Save the starting state for use by merge edges.
  1147  			// We append to a stack allocated variable that we'll
  1148  			// later copy into s.startRegs in one fell swoop, to save
  1149  			// on allocations.
  1150  			regList := make([]startReg, 0, 32)
  1151  			for r := register(0); r < s.numRegs; r++ {
  1152  				v := s.regs[r].v
  1153  				if v == nil {
  1154  					continue
  1155  				}
  1156  				if phiUsed>>r&1 != 0 {
  1157  					// Skip registers that phis used, we'll handle those
  1158  					// specially during merge edge processing.
  1159  					continue
  1160  				}
  1161  				regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
  1162  			}
  1163  			s.startRegs[b.ID] = make([]startReg, len(regList))
  1164  			copy(s.startRegs[b.ID], regList)
  1165  
  1166  			if s.f.pass.debug > regDebug {
  1167  				fmt.Printf("after phis\n")
  1168  				for _, x := range s.startRegs[b.ID] {
  1169  					fmt.Printf("  %s: v%d\n", &s.registers[x.r], x.v.ID)
  1170  				}
  1171  			}
  1172  		}
  1173  
  1174  		// Allocate space to record the desired registers for each value.
  1175  		if l := len(oldSched); cap(dinfo) < l {
  1176  			dinfo = make([]dentry, l)
  1177  		} else {
  1178  			dinfo = dinfo[:l]
  1179  			for i := range dinfo {
  1180  				dinfo[i] = dentry{}
  1181  			}
  1182  		}
  1183  
  1184  		// Load static desired register info at the end of the block.
  1185  		desired.copy(&s.desired[b.ID])
  1186  
  1187  		// Check actual assigned registers at the start of the next block(s).
  1188  		// Dynamically assigned registers will trump the static
  1189  		// desired registers computed during liveness analysis.
  1190  		// Note that we do this phase after startRegs is set above, so that
  1191  		// we get the right behavior for a block which branches to itself.
  1192  		for _, e := range b.Succs {
  1193  			succ := e.b
  1194  			// TODO: prioritize likely successor?
  1195  			for _, x := range s.startRegs[succ.ID] {
  1196  				desired.add(x.v.ID, x.r)
  1197  			}
  1198  			// Process phi ops in succ.
  1199  			pidx := e.i
  1200  			for _, v := range succ.Values {
  1201  				if v.Op != OpPhi {
  1202  					break
  1203  				}
  1204  				if !s.values[v.ID].needReg {
  1205  					continue
  1206  				}
  1207  				rp, ok := s.f.getHome(v.ID).(*Register)
  1208  				if !ok {
  1209  					// If v is not assigned a register, pick a register assigned to one of v's inputs.
  1210  					// Hopefully v will get assigned that register later.
  1211  					// If the inputs have allocated register information, add it to desired,
  1212  					// which may reduce spill or copy operations when the register is available.
  1213  					for _, a := range v.Args {
  1214  						rp, ok = s.f.getHome(a.ID).(*Register)
  1215  						if ok {
  1216  							break
  1217  						}
  1218  					}
  1219  					if !ok {
  1220  						continue
  1221  					}
  1222  				}
  1223  				desired.add(v.Args[pidx].ID, register(rp.num))
  1224  			}
  1225  		}
  1226  		// Walk values backwards computing desired register info.
  1227  		// See computeLive for more comments.
  1228  		for i := len(oldSched) - 1; i >= 0; i-- {
  1229  			v := oldSched[i]
  1230  			prefs := desired.remove(v.ID)
  1231  			regspec := s.regspec(v)
  1232  			desired.clobber(regspec.clobbers)
  1233  			for _, j := range regspec.inputs {
  1234  				if countRegs(j.regs) != 1 {
  1235  					continue
  1236  				}
  1237  				desired.clobber(j.regs)
  1238  				desired.add(v.Args[j.idx].ID, pickReg(j.regs))
  1239  			}
  1240  			if opcodeTable[v.Op].resultInArg0 {
  1241  				if opcodeTable[v.Op].commutative {
  1242  					desired.addList(v.Args[1].ID, prefs)
  1243  				}
  1244  				desired.addList(v.Args[0].ID, prefs)
  1245  			}
  1246  			// Save desired registers for this value.
  1247  			dinfo[i].out = prefs
  1248  			for j, a := range v.Args {
  1249  				if j >= len(dinfo[i].in) {
  1250  					break
  1251  				}
  1252  				dinfo[i].in[j] = desired.get(a.ID)
  1253  			}
  1254  		}
  1255  
  1256  		// Process all the non-phi values.
  1257  		for idx, v := range oldSched {
  1258  			if s.f.pass.debug > regDebug {
  1259  				fmt.Printf("  processing %s\n", v.LongString())
  1260  			}
  1261  			regspec := s.regspec(v)
  1262  			if v.Op == OpPhi {
  1263  				f.Fatalf("phi %s not at start of block", v)
  1264  			}
  1265  			if v.Op == OpSP {
  1266  				s.assignReg(s.SPReg, v, v)
  1267  				b.Values = append(b.Values, v)
  1268  				s.advanceUses(v)
  1269  				s.sp = v.ID
  1270  				continue
  1271  			}
  1272  			if v.Op == OpSB {
  1273  				s.assignReg(s.SBReg, v, v)
  1274  				b.Values = append(b.Values, v)
  1275  				s.advanceUses(v)
  1276  				s.sb = v.ID
  1277  				continue
  1278  			}
  1279  			if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
  1280  				if s.values[v.ID].needReg {
  1281  					if v.Op == OpSelectN {
  1282  						s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
  1283  					} else {
  1284  						var i = 0
  1285  						if v.Op == OpSelect1 {
  1286  							i = 1
  1287  						}
  1288  						s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
  1289  					}
  1290  				}
  1291  				b.Values = append(b.Values, v)
  1292  				s.advanceUses(v)
  1293  				goto issueSpill
  1294  			}
  1295  			if v.Op == OpGetG && s.f.Config.hasGReg {
  1296  				// use hardware g register
  1297  				if s.regs[s.GReg].v != nil {
  1298  					s.freeReg(s.GReg) // kick out the old value
  1299  				}
  1300  				s.assignReg(s.GReg, v, v)
  1301  				b.Values = append(b.Values, v)
  1302  				s.advanceUses(v)
  1303  				goto issueSpill
  1304  			}
  1305  			if v.Op == OpArg {
  1306  				// Args are "pre-spilled" values. We don't allocate
  1307  				// any register here. We just set up the spill pointer to
  1308  				// point at itself and any later user will restore it to use it.
  1309  				s.values[v.ID].spill = v
  1310  				b.Values = append(b.Values, v)
  1311  				s.advanceUses(v)
  1312  				continue
  1313  			}
  1314  			if v.Op == OpKeepAlive {
  1315  				// Make sure the argument to v is still live here.
  1316  				s.advanceUses(v)
  1317  				a := v.Args[0]
  1318  				vi := &s.values[a.ID]
  1319  				if vi.regs == 0 && !vi.rematerializeable {
  1320  					// Use the spill location.
  1321  					// This forces later liveness analysis to make the
  1322  					// value live at this point.
  1323  					v.SetArg(0, s.makeSpill(a, b))
  1324  				} else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
  1325  					// Rematerializeable value with a gc.Node. This is the address of
  1326  					// a stack object (e.g. an LEAQ). Keep the object live.
  1327  					// Change it to VarLive, which is what plive expects for locals.
  1328  					v.Op = OpVarLive
  1329  					v.SetArgs1(v.Args[1])
  1330  					v.Aux = a.Aux
  1331  				} else {
  1332  					// In-register and rematerializeable values are already live.
  1333  					// These are typically rematerializeable constants like nil,
  1334  					// or values of a variable that were modified since the last call.
  1335  					v.Op = OpCopy
  1336  					v.SetArgs1(v.Args[1])
  1337  				}
  1338  				b.Values = append(b.Values, v)
  1339  				continue
  1340  			}
  1341  			if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
  1342  				// No register allocation required (or none specified yet)
  1343  				if s.doClobber && v.Op.IsCall() {
  1344  					s.clobberRegs(regspec.clobbers)
  1345  				}
  1346  				s.freeRegs(regspec.clobbers)
  1347  				b.Values = append(b.Values, v)
  1348  				s.advanceUses(v)
  1349  				continue
  1350  			}
  1351  
  1352  			if s.values[v.ID].rematerializeable {
  1353  				// Value is rematerializeable, don't issue it here.
  1354  				// It will get issued just before each use (see
  1355  				// allocValueToReg).
  1356  				for _, a := range v.Args {
  1357  					a.Uses--
  1358  				}
  1359  				s.advanceUses(v)
  1360  				continue
  1361  			}
  1362  
  1363  			if s.f.pass.debug > regDebug {
  1364  				fmt.Printf("value %s\n", v.LongString())
  1365  				fmt.Printf("  out:")
  1366  				for _, r := range dinfo[idx].out {
  1367  					if r != noRegister {
  1368  						fmt.Printf(" %s", &s.registers[r])
  1369  					}
  1370  				}
  1371  				fmt.Println()
  1372  				for i := 0; i < len(v.Args) && i < 3; i++ {
  1373  					fmt.Printf("  in%d:", i)
  1374  					for _, r := range dinfo[idx].in[i] {
  1375  						if r != noRegister {
  1376  							fmt.Printf(" %s", &s.registers[r])
  1377  						}
  1378  					}
  1379  					fmt.Println()
  1380  				}
  1381  			}
  1382  
  1383  			// Move arguments to registers.
  1384  			// First, if an arg must be in a specific register and it is already
  1385  			// in place, keep it.
  1386  			args = append(args[:0], make([]*Value, len(v.Args))...)
  1387  			for i, a := range v.Args {
  1388  				if !s.values[a.ID].needReg {
  1389  					args[i] = a
  1390  				}
  1391  			}
  1392  			for _, i := range regspec.inputs {
  1393  				mask := i.regs
  1394  				if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 {
  1395  					args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1396  				}
  1397  			}
  1398  			// Then, if an arg must be in a specific register and that
  1399  			// register is free, allocate that one. Otherwise when processing
  1400  			// another input we may kick a value into the free register, which
  1401  			// then will be kicked out again.
  1402  			// This is a common case for passing-in-register arguments for
  1403  			// function calls.
  1404  			for {
  1405  				freed := false
  1406  				for _, i := range regspec.inputs {
  1407  					if args[i.idx] != nil {
  1408  						continue // already allocated
  1409  					}
  1410  					mask := i.regs
  1411  					if countRegs(mask) == 1 && mask&^s.used != 0 {
  1412  						args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1413  						// If the input is in other registers that will be clobbered by v,
  1414  						// or the input is dead, free the registers. This may make room
  1415  						// for other inputs.
  1416  						oldregs := s.values[v.Args[i.idx].ID].regs
  1417  						if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
  1418  							s.freeRegs(oldregs &^ mask &^ s.nospill)
  1419  							freed = true
  1420  						}
  1421  					}
  1422  				}
  1423  				if !freed {
  1424  					break
  1425  				}
  1426  			}
  1427  			// Last, allocate remaining ones, in an ordering defined
  1428  			// by the register specification (most constrained first).
  1429  			for _, i := range regspec.inputs {
  1430  				if args[i.idx] != nil {
  1431  					continue // already allocated
  1432  				}
  1433  				mask := i.regs
  1434  				if mask&s.values[v.Args[i.idx].ID].regs == 0 {
  1435  					// Need a new register for the input.
  1436  					mask &= s.allocatable
  1437  					mask &^= s.nospill
  1438  					// Used desired register if available.
  1439  					if i.idx < 3 {
  1440  						for _, r := range dinfo[idx].in[i.idx] {
  1441  							if r != noRegister && (mask&^s.used)>>r&1 != 0 {
  1442  								// Desired register is allowed and unused.
  1443  								mask = regMask(1) << r
  1444  								break
  1445  							}
  1446  						}
  1447  					}
  1448  					// Avoid registers we're saving for other values.
  1449  					if mask&^desired.avoid != 0 {
  1450  						mask &^= desired.avoid
  1451  					}
  1452  				}
  1453  				args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1454  			}
  1455  
  1456  			// If the output clobbers the input register, make sure we have
  1457  			// at least two copies of the input register so we don't
  1458  			// have to reload the value from the spill location.
  1459  			if opcodeTable[v.Op].resultInArg0 {
  1460  				var m regMask
  1461  				if !s.liveAfterCurrentInstruction(v.Args[0]) {
  1462  					// arg0 is dead.  We can clobber its register.
  1463  					goto ok
  1464  				}
  1465  				if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
  1466  					args[0], args[1] = args[1], args[0]
  1467  					goto ok
  1468  				}
  1469  				if s.values[v.Args[0].ID].rematerializeable {
  1470  					// We can rematerialize the input, don't worry about clobbering it.
  1471  					goto ok
  1472  				}
  1473  				if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
  1474  					args[0], args[1] = args[1], args[0]
  1475  					goto ok
  1476  				}
  1477  				if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
  1478  					// we have at least 2 copies of arg0.  We can afford to clobber one.
  1479  					goto ok
  1480  				}
  1481  				if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
  1482  					args[0], args[1] = args[1], args[0]
  1483  					goto ok
  1484  				}
  1485  
  1486  				// We can't overwrite arg0 (or arg1, if commutative).  So we
  1487  				// need to make a copy of an input so we have a register we can modify.
  1488  
  1489  				// Possible new registers to copy into.
  1490  				m = s.compatRegs(v.Args[0].Type) &^ s.used
  1491  				if m == 0 {
  1492  					// No free registers.  In this case we'll just clobber
  1493  					// an input and future uses of that input must use a restore.
  1494  					// TODO(khr): We should really do this like allocReg does it,
  1495  					// spilling the value with the most distant next use.
  1496  					goto ok
  1497  				}
  1498  
  1499  				// Try to move an input to the desired output, if allowed.
  1500  				for _, r := range dinfo[idx].out {
  1501  					if r != noRegister && (m&regspec.outputs[0].regs)>>r&1 != 0 {
  1502  						m = regMask(1) << r
  1503  						args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
  1504  						// Note: we update args[0] so the instruction will
  1505  						// use the register copy we just made.
  1506  						goto ok
  1507  					}
  1508  				}
  1509  				// Try to copy input to its desired location & use its old
  1510  				// location as the result register.
  1511  				for _, r := range dinfo[idx].in[0] {
  1512  					if r != noRegister && m>>r&1 != 0 {
  1513  						m = regMask(1) << r
  1514  						c := s.allocValToReg(v.Args[0], m, true, v.Pos)
  1515  						s.copies[c] = false
  1516  						// Note: no update to args[0] so the instruction will
  1517  						// use the original copy.
  1518  						goto ok
  1519  					}
  1520  				}
  1521  				if opcodeTable[v.Op].commutative {
  1522  					for _, r := range dinfo[idx].in[1] {
  1523  						if r != noRegister && m>>r&1 != 0 {
  1524  							m = regMask(1) << r
  1525  							c := s.allocValToReg(v.Args[1], m, true, v.Pos)
  1526  							s.copies[c] = false
  1527  							args[0], args[1] = args[1], args[0]
  1528  							goto ok
  1529  						}
  1530  					}
  1531  				}
  1532  				// Avoid future fixed uses if we can.
  1533  				if m&^desired.avoid != 0 {
  1534  					m &^= desired.avoid
  1535  				}
  1536  				// Save input 0 to a new register so we can clobber it.
  1537  				c := s.allocValToReg(v.Args[0], m, true, v.Pos)
  1538  				s.copies[c] = false
  1539  			}
  1540  
  1541  		ok:
  1542  			// Now that all args are in regs, we're ready to issue the value itself.
  1543  			// Before we pick a register for the output value, allow input registers
  1544  			// to be deallocated. We do this here so that the output can use the
  1545  			// same register as a dying input.
  1546  			if !opcodeTable[v.Op].resultNotInArgs {
  1547  				s.tmpused = s.nospill
  1548  				s.nospill = 0
  1549  				s.advanceUses(v) // frees any registers holding args that are no longer live
  1550  			}
  1551  
  1552  			// Dump any registers which will be clobbered
  1553  			if s.doClobber && v.Op.IsCall() {
  1554  				// clobber registers that are marked as clobber in regmask, but
  1555  				// don't clobber inputs.
  1556  				s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
  1557  			}
  1558  			s.freeRegs(regspec.clobbers)
  1559  			s.tmpused |= regspec.clobbers
  1560  
  1561  			// Pick registers for outputs.
  1562  			{
  1563  				outRegs := noRegisters // TODO if this is costly, hoist and clear incrementally below.
  1564  				maxOutIdx := -1
  1565  				var used regMask
  1566  				for _, out := range regspec.outputs {
  1567  					mask := out.regs & s.allocatable &^ used
  1568  					if mask == 0 {
  1569  						continue
  1570  					}
  1571  					if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
  1572  						if !opcodeTable[v.Op].commutative {
  1573  							// Output must use the same register as input 0.
  1574  							r := register(s.f.getHome(args[0].ID).(*Register).num)
  1575  							if mask>>r&1 == 0 {
  1576  								s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
  1577  							}
  1578  							mask = regMask(1) << r
  1579  						} else {
  1580  							// Output must use the same register as input 0 or 1.
  1581  							r0 := register(s.f.getHome(args[0].ID).(*Register).num)
  1582  							r1 := register(s.f.getHome(args[1].ID).(*Register).num)
  1583  							// Check r0 and r1 for desired output register.
  1584  							found := false
  1585  							for _, r := range dinfo[idx].out {
  1586  								if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
  1587  									mask = regMask(1) << r
  1588  									found = true
  1589  									if r == r1 {
  1590  										args[0], args[1] = args[1], args[0]
  1591  									}
  1592  									break
  1593  								}
  1594  							}
  1595  							if !found {
  1596  								// Neither are desired, pick r0.
  1597  								mask = regMask(1) << r0
  1598  							}
  1599  						}
  1600  					}
  1601  					for _, r := range dinfo[idx].out {
  1602  						if r != noRegister && (mask&^s.used)>>r&1 != 0 {
  1603  							// Desired register is allowed and unused.
  1604  							mask = regMask(1) << r
  1605  							break
  1606  						}
  1607  					}
  1608  					// Avoid registers we're saving for other values.
  1609  					if mask&^desired.avoid&^s.nospill != 0 {
  1610  						mask &^= desired.avoid
  1611  					}
  1612  					r := s.allocReg(mask, v)
  1613  					if out.idx > maxOutIdx {
  1614  						maxOutIdx = out.idx
  1615  					}
  1616  					outRegs[out.idx] = r
  1617  					used |= regMask(1) << r
  1618  					s.tmpused |= regMask(1) << r
  1619  				}
  1620  				// Record register choices
  1621  				if v.Type.IsTuple() {
  1622  					var outLocs LocPair
  1623  					if r := outRegs[0]; r != noRegister {
  1624  						outLocs[0] = &s.registers[r]
  1625  					}
  1626  					if r := outRegs[1]; r != noRegister {
  1627  						outLocs[1] = &s.registers[r]
  1628  					}
  1629  					s.f.setHome(v, outLocs)
  1630  					// Note that subsequent SelectX instructions will do the assignReg calls.
  1631  				} else if v.Type.IsResults() {
  1632  					// preallocate outLocs to the right size, which is maxOutIdx+1
  1633  					outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
  1634  					for i := 0; i <= maxOutIdx; i++ {
  1635  						if r := outRegs[i]; r != noRegister {
  1636  							outLocs[i] = &s.registers[r]
  1637  						}
  1638  					}
  1639  					s.f.setHome(v, outLocs)
  1640  				} else {
  1641  					if r := outRegs[0]; r != noRegister {
  1642  						s.assignReg(r, v, v)
  1643  					}
  1644  				}
  1645  			}
  1646  
  1647  			// deallocate dead args, if we have not done so
  1648  			if opcodeTable[v.Op].resultNotInArgs {
  1649  				s.nospill = 0
  1650  				s.advanceUses(v) // frees any registers holding args that are no longer live
  1651  			}
  1652  			s.tmpused = 0
  1653  
  1654  			// Issue the Value itself.
  1655  			for i, a := range args {
  1656  				v.SetArg(i, a) // use register version of arguments
  1657  			}
  1658  			b.Values = append(b.Values, v)
  1659  
  1660  		issueSpill:
  1661  		}
  1662  
  1663  		// Copy the control values - we need this so we can reduce the
  1664  		// uses property of these values later.
  1665  		controls := append(make([]*Value, 0, 2), b.ControlValues()...)
  1666  
  1667  		// Load control values into registers.
  1668  		for i, v := range b.ControlValues() {
  1669  			if !s.values[v.ID].needReg {
  1670  				continue
  1671  			}
  1672  			if s.f.pass.debug > regDebug {
  1673  				fmt.Printf("  processing control %s\n", v.LongString())
  1674  			}
  1675  			// We assume that a control input can be passed in any
  1676  			// type-compatible register. If this turns out not to be true,
  1677  			// we'll need to introduce a regspec for a block's control value.
  1678  			b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
  1679  		}
  1680  
  1681  		// Reduce the uses of the control values once registers have been loaded.
  1682  		// This loop is equivalent to the advanceUses method.
  1683  		for _, v := range controls {
  1684  			vi := &s.values[v.ID]
  1685  			if !vi.needReg {
  1686  				continue
  1687  			}
  1688  			// Remove this use from the uses list.
  1689  			u := vi.uses
  1690  			vi.uses = u.next
  1691  			if u.next == nil {
  1692  				s.freeRegs(vi.regs) // value is dead
  1693  			}
  1694  			u.next = s.freeUseRecords
  1695  			s.freeUseRecords = u
  1696  		}
  1697  
  1698  		// If we are approaching a merge point and we are the primary
  1699  		// predecessor of it, find live values that we use soon after
  1700  		// the merge point and promote them to registers now.
  1701  		if len(b.Succs) == 1 {
  1702  			if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
  1703  				s.freeReg(s.GReg) // Spill value in G register before any merge.
  1704  			}
  1705  			// For this to be worthwhile, the loop must have no calls in it.
  1706  			top := b.Succs[0].b
  1707  			loop := s.loopnest.b2l[top.ID]
  1708  			if loop == nil || loop.header != top || loop.containsUnavoidableCall {
  1709  				goto badloop
  1710  			}
  1711  
  1712  			// TODO: sort by distance, pick the closest ones?
  1713  			for _, live := range s.live[b.ID] {
  1714  				if live.dist >= unlikelyDistance {
  1715  					// Don't preload anything live after the loop.
  1716  					continue
  1717  				}
  1718  				vid := live.ID
  1719  				vi := &s.values[vid]
  1720  				if vi.regs != 0 {
  1721  					continue
  1722  				}
  1723  				if vi.rematerializeable {
  1724  					continue
  1725  				}
  1726  				v := s.orig[vid]
  1727  				m := s.compatRegs(v.Type) &^ s.used
  1728  				// Used desired register if available.
  1729  			outerloop:
  1730  				for _, e := range desired.entries {
  1731  					if e.ID != v.ID {
  1732  						continue
  1733  					}
  1734  					for _, r := range e.regs {
  1735  						if r != noRegister && m>>r&1 != 0 {
  1736  							m = regMask(1) << r
  1737  							break outerloop
  1738  						}
  1739  					}
  1740  				}
  1741  				if m&^desired.avoid != 0 {
  1742  					m &^= desired.avoid
  1743  				}
  1744  				if m != 0 {
  1745  					s.allocValToReg(v, m, false, b.Pos)
  1746  				}
  1747  			}
  1748  		}
  1749  	badloop:
  1750  		;
  1751  
  1752  		// Save end-of-block register state.
  1753  		// First count how many, this cuts allocations in half.
  1754  		k := 0
  1755  		for r := register(0); r < s.numRegs; r++ {
  1756  			v := s.regs[r].v
  1757  			if v == nil {
  1758  				continue
  1759  			}
  1760  			k++
  1761  		}
  1762  		regList := make([]endReg, 0, k)
  1763  		for r := register(0); r < s.numRegs; r++ {
  1764  			v := s.regs[r].v
  1765  			if v == nil {
  1766  				continue
  1767  			}
  1768  			regList = append(regList, endReg{r, v, s.regs[r].c})
  1769  		}
  1770  		s.endRegs[b.ID] = regList
  1771  
  1772  		if checkEnabled {
  1773  			regValLiveSet.clear()
  1774  			for _, x := range s.live[b.ID] {
  1775  				regValLiveSet.add(x.ID)
  1776  			}
  1777  			for r := register(0); r < s.numRegs; r++ {
  1778  				v := s.regs[r].v
  1779  				if v == nil {
  1780  					continue
  1781  				}
  1782  				if !regValLiveSet.contains(v.ID) {
  1783  					s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
  1784  				}
  1785  			}
  1786  		}
  1787  
  1788  		// If a value is live at the end of the block and
  1789  		// isn't in a register, generate a use for the spill location.
  1790  		// We need to remember this information so that
  1791  		// the liveness analysis in stackalloc is correct.
  1792  		for _, e := range s.live[b.ID] {
  1793  			vi := &s.values[e.ID]
  1794  			if vi.regs != 0 {
  1795  				// in a register, we'll use that source for the merge.
  1796  				continue
  1797  			}
  1798  			if vi.rematerializeable {
  1799  				// we'll rematerialize during the merge.
  1800  				continue
  1801  			}
  1802  			if s.f.pass.debug > regDebug {
  1803  				fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
  1804  			}
  1805  			spill := s.makeSpill(s.orig[e.ID], b)
  1806  			s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
  1807  		}
  1808  
  1809  		// Clear any final uses.
  1810  		// All that is left should be the pseudo-uses added for values which
  1811  		// are live at the end of b.
  1812  		for _, e := range s.live[b.ID] {
  1813  			u := s.values[e.ID].uses
  1814  			if u == nil {
  1815  				f.Fatalf("live at end, no uses v%d", e.ID)
  1816  			}
  1817  			if u.next != nil {
  1818  				f.Fatalf("live at end, too many uses v%d", e.ID)
  1819  			}
  1820  			s.values[e.ID].uses = nil
  1821  			u.next = s.freeUseRecords
  1822  			s.freeUseRecords = u
  1823  		}
  1824  	}
  1825  
  1826  	// Decide where the spills we generated will go.
  1827  	s.placeSpills()
  1828  
  1829  	// Anything that didn't get a register gets a stack location here.
  1830  	// (StoreReg, stack-based phis, inputs, ...)
  1831  	stacklive := stackalloc(s.f, s.spillLive)
  1832  
  1833  	// Fix up all merge edges.
  1834  	s.shuffle(stacklive)
  1835  
  1836  	// Erase any copies we never used.
  1837  	// Also, an unused copy might be the only use of another copy,
  1838  	// so continue erasing until we reach a fixed point.
  1839  	for {
  1840  		progress := false
  1841  		for c, used := range s.copies {
  1842  			if !used && c.Uses == 0 {
  1843  				if s.f.pass.debug > regDebug {
  1844  					fmt.Printf("delete copied value %s\n", c.LongString())
  1845  				}
  1846  				c.resetArgs()
  1847  				f.freeValue(c)
  1848  				delete(s.copies, c)
  1849  				progress = true
  1850  			}
  1851  		}
  1852  		if !progress {
  1853  			break
  1854  		}
  1855  	}
  1856  
  1857  	for _, b := range s.visitOrder {
  1858  		i := 0
  1859  		for _, v := range b.Values {
  1860  			if v.Op == OpInvalid {
  1861  				continue
  1862  			}
  1863  			b.Values[i] = v
  1864  			i++
  1865  		}
  1866  		b.Values = b.Values[:i]
  1867  	}
  1868  }
  1869  
  1870  func (s *regAllocState) placeSpills() {
  1871  	mustBeFirst := func(op Op) bool {
  1872  		return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
  1873  	}
  1874  
  1875  	// Start maps block IDs to the list of spills
  1876  	// that go at the start of the block (but after any phis).
  1877  	start := map[ID][]*Value{}
  1878  	// After maps value IDs to the list of spills
  1879  	// that go immediately after that value ID.
  1880  	after := map[ID][]*Value{}
  1881  
  1882  	for i := range s.values {
  1883  		vi := s.values[i]
  1884  		spill := vi.spill
  1885  		if spill == nil {
  1886  			continue
  1887  		}
  1888  		if spill.Block != nil {
  1889  			// Some spills are already fully set up,
  1890  			// like OpArgs and stack-based phis.
  1891  			continue
  1892  		}
  1893  		v := s.orig[i]
  1894  
  1895  		// Walk down the dominator tree looking for a good place to
  1896  		// put the spill of v.  At the start "best" is the best place
  1897  		// we have found so far.
  1898  		// TODO: find a way to make this O(1) without arbitrary cutoffs.
  1899  		if v == nil {
  1900  			panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
  1901  		}
  1902  		best := v.Block
  1903  		bestArg := v
  1904  		var bestDepth int16
  1905  		if l := s.loopnest.b2l[best.ID]; l != nil {
  1906  			bestDepth = l.depth
  1907  		}
  1908  		b := best
  1909  		const maxSpillSearch = 100
  1910  		for i := 0; i < maxSpillSearch; i++ {
  1911  			// Find the child of b in the dominator tree which
  1912  			// dominates all restores.
  1913  			p := b
  1914  			b = nil
  1915  			for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
  1916  				if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
  1917  					// c also dominates all restores.  Walk down into c.
  1918  					b = c
  1919  					break
  1920  				}
  1921  			}
  1922  			if b == nil {
  1923  				// Ran out of blocks which dominate all restores.
  1924  				break
  1925  			}
  1926  
  1927  			var depth int16
  1928  			if l := s.loopnest.b2l[b.ID]; l != nil {
  1929  				depth = l.depth
  1930  			}
  1931  			if depth > bestDepth {
  1932  				// Don't push the spill into a deeper loop.
  1933  				continue
  1934  			}
  1935  
  1936  			// If v is in a register at the start of b, we can
  1937  			// place the spill here (after the phis).
  1938  			if len(b.Preds) == 1 {
  1939  				for _, e := range s.endRegs[b.Preds[0].b.ID] {
  1940  					if e.v == v {
  1941  						// Found a better spot for the spill.
  1942  						best = b
  1943  						bestArg = e.c
  1944  						bestDepth = depth
  1945  						break
  1946  					}
  1947  				}
  1948  			} else {
  1949  				for _, e := range s.startRegs[b.ID] {
  1950  					if e.v == v {
  1951  						// Found a better spot for the spill.
  1952  						best = b
  1953  						bestArg = e.c
  1954  						bestDepth = depth
  1955  						break
  1956  					}
  1957  				}
  1958  			}
  1959  		}
  1960  
  1961  		// Put the spill in the best block we found.
  1962  		spill.Block = best
  1963  		spill.AddArg(bestArg)
  1964  		if best == v.Block && !mustBeFirst(v.Op) {
  1965  			// Place immediately after v.
  1966  			after[v.ID] = append(after[v.ID], spill)
  1967  		} else {
  1968  			// Place at the start of best block.
  1969  			start[best.ID] = append(start[best.ID], spill)
  1970  		}
  1971  	}
  1972  
  1973  	// Insert spill instructions into the block schedules.
  1974  	var oldSched []*Value
  1975  	for _, b := range s.visitOrder {
  1976  		nfirst := 0
  1977  		for _, v := range b.Values {
  1978  			if !mustBeFirst(v.Op) {
  1979  				break
  1980  			}
  1981  			nfirst++
  1982  		}
  1983  		oldSched = append(oldSched[:0], b.Values[nfirst:]...)
  1984  		b.Values = b.Values[:nfirst]
  1985  		b.Values = append(b.Values, start[b.ID]...)
  1986  		for _, v := range oldSched {
  1987  			b.Values = append(b.Values, v)
  1988  			b.Values = append(b.Values, after[v.ID]...)
  1989  		}
  1990  	}
  1991  }
  1992  
  1993  // shuffle fixes up all the merge edges (those going into blocks of indegree > 1).
  1994  func (s *regAllocState) shuffle(stacklive [][]ID) {
  1995  	var e edgeState
  1996  	e.s = s
  1997  	e.cache = map[ID][]*Value{}
  1998  	e.contents = map[Location]contentRecord{}
  1999  	if s.f.pass.debug > regDebug {
  2000  		fmt.Printf("shuffle %s\n", s.f.Name)
  2001  		fmt.Println(s.f.String())
  2002  	}
  2003  
  2004  	for _, b := range s.visitOrder {
  2005  		if len(b.Preds) <= 1 {
  2006  			continue
  2007  		}
  2008  		e.b = b
  2009  		for i, edge := range b.Preds {
  2010  			p := edge.b
  2011  			e.p = p
  2012  			e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
  2013  			e.process()
  2014  		}
  2015  	}
  2016  
  2017  	if s.f.pass.debug > regDebug {
  2018  		fmt.Printf("post shuffle %s\n", s.f.Name)
  2019  		fmt.Println(s.f.String())
  2020  	}
  2021  }
  2022  
  2023  type edgeState struct {
  2024  	s    *regAllocState
  2025  	p, b *Block // edge goes from p->b.
  2026  
  2027  	// for each pre-regalloc value, a list of equivalent cached values
  2028  	cache      map[ID][]*Value
  2029  	cachedVals []ID // (superset of) keys of the above map, for deterministic iteration
  2030  
  2031  	// map from location to the value it contains
  2032  	contents map[Location]contentRecord
  2033  
  2034  	// desired destination locations
  2035  	destinations []dstRecord
  2036  	extra        []dstRecord
  2037  
  2038  	usedRegs              regMask // registers currently holding something
  2039  	uniqueRegs            regMask // registers holding the only copy of a value
  2040  	finalRegs             regMask // registers holding final target
  2041  	rematerializeableRegs regMask // registers that hold rematerializeable values
  2042  }
  2043  
  2044  type contentRecord struct {
  2045  	vid   ID       // pre-regalloc value
  2046  	c     *Value   // cached value
  2047  	final bool     // this is a satisfied destination
  2048  	pos   src.XPos // source position of use of the value
  2049  }
  2050  
  2051  type dstRecord struct {
  2052  	loc    Location // register or stack slot
  2053  	vid    ID       // pre-regalloc value it should contain
  2054  	splice **Value  // place to store reference to the generating instruction
  2055  	pos    src.XPos // source position of use of this location
  2056  }
  2057  
  2058  // setup initializes the edge state for shuffling.
  2059  func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
  2060  	if e.s.f.pass.debug > regDebug {
  2061  		fmt.Printf("edge %s->%s\n", e.p, e.b)
  2062  	}
  2063  
  2064  	// Clear state.
  2065  	for _, vid := range e.cachedVals {
  2066  		delete(e.cache, vid)
  2067  	}
  2068  	e.cachedVals = e.cachedVals[:0]
  2069  	for k := range e.contents {
  2070  		delete(e.contents, k)
  2071  	}
  2072  	e.usedRegs = 0
  2073  	e.uniqueRegs = 0
  2074  	e.finalRegs = 0
  2075  	e.rematerializeableRegs = 0
  2076  
  2077  	// Live registers can be sources.
  2078  	for _, x := range srcReg {
  2079  		e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos) // don't care the position of the source
  2080  	}
  2081  	// So can all of the spill locations.
  2082  	for _, spillID := range stacklive {
  2083  		v := e.s.orig[spillID]
  2084  		spill := e.s.values[v.ID].spill
  2085  		if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
  2086  			// Spills were placed that only dominate the uses found
  2087  			// during the first regalloc pass. The edge fixup code
  2088  			// can't use a spill location if the spill doesn't dominate
  2089  			// the edge.
  2090  			// We are guaranteed that if the spill doesn't dominate this edge,
  2091  			// then the value is available in a register (because we called
  2092  			// makeSpill for every value not in a register at the start
  2093  			// of an edge).
  2094  			continue
  2095  		}
  2096  		e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source
  2097  	}
  2098  
  2099  	// Figure out all the destinations we need.
  2100  	dsts := e.destinations[:0]
  2101  	for _, x := range dstReg {
  2102  		dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
  2103  	}
  2104  	// Phis need their args to end up in a specific location.
  2105  	for _, v := range e.b.Values {
  2106  		if v.Op != OpPhi {
  2107  			break
  2108  		}
  2109  		loc := e.s.f.getHome(v.ID)
  2110  		if loc == nil {
  2111  			continue
  2112  		}
  2113  		dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
  2114  	}
  2115  	e.destinations = dsts
  2116  
  2117  	if e.s.f.pass.debug > regDebug {
  2118  		for _, vid := range e.cachedVals {
  2119  			a := e.cache[vid]
  2120  			for _, c := range a {
  2121  				fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
  2122  			}
  2123  		}
  2124  		for _, d := range e.destinations {
  2125  			fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
  2126  		}
  2127  	}
  2128  }
  2129  
  2130  // process generates code to move all the values to the right destination locations.
  2131  func (e *edgeState) process() {
  2132  	dsts := e.destinations
  2133  
  2134  	// Process the destinations until they are all satisfied.
  2135  	for len(dsts) > 0 {
  2136  		i := 0
  2137  		for _, d := range dsts {
  2138  			if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
  2139  				// Failed - save for next iteration.
  2140  				dsts[i] = d
  2141  				i++
  2142  			}
  2143  		}
  2144  		if i < len(dsts) {
  2145  			// Made some progress. Go around again.
  2146  			dsts = dsts[:i]
  2147  
  2148  			// Append any extras destinations we generated.
  2149  			dsts = append(dsts, e.extra...)
  2150  			e.extra = e.extra[:0]
  2151  			continue
  2152  		}
  2153  
  2154  		// We made no progress. That means that any
  2155  		// remaining unsatisfied moves are in simple cycles.
  2156  		// For example, A -> B -> C -> D -> A.
  2157  		//   A ----> B
  2158  		//   ^       |
  2159  		//   |       |
  2160  		//   |       v
  2161  		//   D <---- C
  2162  
  2163  		// To break the cycle, we pick an unused register, say R,
  2164  		// and put a copy of B there.
  2165  		//   A ----> B
  2166  		//   ^       |
  2167  		//   |       |
  2168  		//   |       v
  2169  		//   D <---- C <---- R=copyofB
  2170  		// When we resume the outer loop, the A->B move can now proceed,
  2171  		// and eventually the whole cycle completes.
  2172  
  2173  		// Copy any cycle location to a temp register. This duplicates
  2174  		// one of the cycle entries, allowing the just duplicated value
  2175  		// to be overwritten and the cycle to proceed.
  2176  		d := dsts[0]
  2177  		loc := d.loc
  2178  		vid := e.contents[loc].vid
  2179  		c := e.contents[loc].c
  2180  		r := e.findRegFor(c.Type)
  2181  		if e.s.f.pass.debug > regDebug {
  2182  			fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
  2183  		}
  2184  		e.erase(r)
  2185  		pos := d.pos.WithNotStmt()
  2186  		if _, isReg := loc.(*Register); isReg {
  2187  			c = e.p.NewValue1(pos, OpCopy, c.Type, c)
  2188  		} else {
  2189  			c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2190  		}
  2191  		e.set(r, vid, c, false, pos)
  2192  		if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
  2193  			e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
  2194  		}
  2195  	}
  2196  }
  2197  
  2198  // processDest generates code to put value vid into location loc. Returns true
  2199  // if progress was made.
  2200  func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
  2201  	pos = pos.WithNotStmt()
  2202  	occupant := e.contents[loc]
  2203  	if occupant.vid == vid {
  2204  		// Value is already in the correct place.
  2205  		e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
  2206  		if splice != nil {
  2207  			(*splice).Uses--
  2208  			*splice = occupant.c
  2209  			occupant.c.Uses++
  2210  		}
  2211  		// Note: if splice==nil then c will appear dead. This is
  2212  		// non-SSA formed code, so be careful after this pass not to run
  2213  		// deadcode elimination.
  2214  		if _, ok := e.s.copies[occupant.c]; ok {
  2215  			// The copy at occupant.c was used to avoid spill.
  2216  			e.s.copies[occupant.c] = true
  2217  		}
  2218  		return true
  2219  	}
  2220  
  2221  	// Check if we're allowed to clobber the destination location.
  2222  	if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable {
  2223  		// We can't overwrite the last copy
  2224  		// of a value that needs to survive.
  2225  		return false
  2226  	}
  2227  
  2228  	// Copy from a source of v, register preferred.
  2229  	v := e.s.orig[vid]
  2230  	var c *Value
  2231  	var src Location
  2232  	if e.s.f.pass.debug > regDebug {
  2233  		fmt.Printf("moving v%d to %s\n", vid, loc)
  2234  		fmt.Printf("sources of v%d:", vid)
  2235  	}
  2236  	for _, w := range e.cache[vid] {
  2237  		h := e.s.f.getHome(w.ID)
  2238  		if e.s.f.pass.debug > regDebug {
  2239  			fmt.Printf(" %s:%s", h, w)
  2240  		}
  2241  		_, isreg := h.(*Register)
  2242  		if src == nil || isreg {
  2243  			c = w
  2244  			src = h
  2245  		}
  2246  	}
  2247  	if e.s.f.pass.debug > regDebug {
  2248  		if src != nil {
  2249  			fmt.Printf(" [use %s]\n", src)
  2250  		} else {
  2251  			fmt.Printf(" [no source]\n")
  2252  		}
  2253  	}
  2254  	_, dstReg := loc.(*Register)
  2255  
  2256  	// Pre-clobber destination. This avoids the
  2257  	// following situation:
  2258  	//   - v is currently held in R0 and stacktmp0.
  2259  	//   - We want to copy stacktmp1 to stacktmp0.
  2260  	//   - We choose R0 as the temporary register.
  2261  	// During the copy, both R0 and stacktmp0 are
  2262  	// clobbered, losing both copies of v. Oops!
  2263  	// Erasing the destination early means R0 will not
  2264  	// be chosen as the temp register, as it will then
  2265  	// be the last copy of v.
  2266  	e.erase(loc)
  2267  	var x *Value
  2268  	if c == nil || e.s.values[vid].rematerializeable {
  2269  		if !e.s.values[vid].rematerializeable {
  2270  			e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
  2271  		}
  2272  		if dstReg {
  2273  			x = v.copyInto(e.p)
  2274  		} else {
  2275  			// Rematerialize into stack slot. Need a free
  2276  			// register to accomplish this.
  2277  			r := e.findRegFor(v.Type)
  2278  			e.erase(r)
  2279  			x = v.copyIntoWithXPos(e.p, pos)
  2280  			e.set(r, vid, x, false, pos)
  2281  			// Make sure we spill with the size of the slot, not the
  2282  			// size of x (which might be wider due to our dropping
  2283  			// of narrowing conversions).
  2284  			x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
  2285  		}
  2286  	} else {
  2287  		// Emit move from src to dst.
  2288  		_, srcReg := src.(*Register)
  2289  		if srcReg {
  2290  			if dstReg {
  2291  				x = e.p.NewValue1(pos, OpCopy, c.Type, c)
  2292  			} else {
  2293  				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
  2294  			}
  2295  		} else {
  2296  			if dstReg {
  2297  				x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2298  			} else {
  2299  				// mem->mem. Use temp register.
  2300  				r := e.findRegFor(c.Type)
  2301  				e.erase(r)
  2302  				t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2303  				e.set(r, vid, t, false, pos)
  2304  				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
  2305  			}
  2306  		}
  2307  	}
  2308  	e.set(loc, vid, x, true, pos)
  2309  	if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
  2310  		e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
  2311  	}
  2312  	if splice != nil {
  2313  		(*splice).Uses--
  2314  		*splice = x
  2315  		x.Uses++
  2316  	}
  2317  	return true
  2318  }
  2319  
  2320  // set changes the contents of location loc to hold the given value and its cached representative.
  2321  func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
  2322  	e.s.f.setHome(c, loc)
  2323  	e.contents[loc] = contentRecord{vid, c, final, pos}
  2324  	a := e.cache[vid]
  2325  	if len(a) == 0 {
  2326  		e.cachedVals = append(e.cachedVals, vid)
  2327  	}
  2328  	a = append(a, c)
  2329  	e.cache[vid] = a
  2330  	if r, ok := loc.(*Register); ok {
  2331  		if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
  2332  			e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
  2333  		}
  2334  		e.usedRegs |= regMask(1) << uint(r.num)
  2335  		if final {
  2336  			e.finalRegs |= regMask(1) << uint(r.num)
  2337  		}
  2338  		if len(a) == 1 {
  2339  			e.uniqueRegs |= regMask(1) << uint(r.num)
  2340  		}
  2341  		if len(a) == 2 {
  2342  			if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
  2343  				e.uniqueRegs &^= regMask(1) << uint(t.num)
  2344  			}
  2345  		}
  2346  		if e.s.values[vid].rematerializeable {
  2347  			e.rematerializeableRegs |= regMask(1) << uint(r.num)
  2348  		}
  2349  	}
  2350  	if e.s.f.pass.debug > regDebug {
  2351  		fmt.Printf("%s\n", c.LongString())
  2352  		fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
  2353  	}
  2354  }
  2355  
  2356  // erase removes any user of loc.
  2357  func (e *edgeState) erase(loc Location) {
  2358  	cr := e.contents[loc]
  2359  	if cr.c == nil {
  2360  		return
  2361  	}
  2362  	vid := cr.vid
  2363  
  2364  	if cr.final {
  2365  		// Add a destination to move this value back into place.
  2366  		// Make sure it gets added to the tail of the destination queue
  2367  		// so we make progress on other moves first.
  2368  		e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
  2369  	}
  2370  
  2371  	// Remove c from the list of cached values.
  2372  	a := e.cache[vid]
  2373  	for i, c := range a {
  2374  		if e.s.f.getHome(c.ID) == loc {
  2375  			if e.s.f.pass.debug > regDebug {
  2376  				fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
  2377  			}
  2378  			a[i], a = a[len(a)-1], a[:len(a)-1]
  2379  			break
  2380  		}
  2381  	}
  2382  	e.cache[vid] = a
  2383  
  2384  	// Update register masks.
  2385  	if r, ok := loc.(*Register); ok {
  2386  		e.usedRegs &^= regMask(1) << uint(r.num)
  2387  		if cr.final {
  2388  			e.finalRegs &^= regMask(1) << uint(r.num)
  2389  		}
  2390  		e.rematerializeableRegs &^= regMask(1) << uint(r.num)
  2391  	}
  2392  	if len(a) == 1 {
  2393  		if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
  2394  			e.uniqueRegs |= regMask(1) << uint(r.num)
  2395  		}
  2396  	}
  2397  }
  2398  
  2399  // findRegFor finds a register we can use to make a temp copy of type typ.
  2400  func (e *edgeState) findRegFor(typ *types.Type) Location {
  2401  	// Which registers are possibilities.
  2402  	types := &e.s.f.Config.Types
  2403  	m := e.s.compatRegs(typ)
  2404  
  2405  	// Pick a register. In priority order:
  2406  	// 1) an unused register
  2407  	// 2) a non-unique register not holding a final value
  2408  	// 3) a non-unique register
  2409  	// 4) a register holding a rematerializeable value
  2410  	x := m &^ e.usedRegs
  2411  	if x != 0 {
  2412  		return &e.s.registers[pickReg(x)]
  2413  	}
  2414  	x = m &^ e.uniqueRegs &^ e.finalRegs
  2415  	if x != 0 {
  2416  		return &e.s.registers[pickReg(x)]
  2417  	}
  2418  	x = m &^ e.uniqueRegs
  2419  	if x != 0 {
  2420  		return &e.s.registers[pickReg(x)]
  2421  	}
  2422  	x = m & e.rematerializeableRegs
  2423  	if x != 0 {
  2424  		return &e.s.registers[pickReg(x)]
  2425  	}
  2426  
  2427  	// No register is available.
  2428  	// Pick a register to spill.
  2429  	for _, vid := range e.cachedVals {
  2430  		a := e.cache[vid]
  2431  		for _, c := range a {
  2432  			if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
  2433  				if !c.rematerializeable() {
  2434  					x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
  2435  					// Allocate a temp location to spill a register to.
  2436  					// The type of the slot is immaterial - it will not be live across
  2437  					// any safepoint. Just use a type big enough to hold any register.
  2438  					t := LocalSlot{N: e.s.f.fe.Auto(c.Pos, types.Int64), Type: types.Int64}
  2439  					// TODO: reuse these slots. They'll need to be erased first.
  2440  					e.set(t, vid, x, false, c.Pos)
  2441  					if e.s.f.pass.debug > regDebug {
  2442  						fmt.Printf("  SPILL %s->%s %s\n", r, t, x.LongString())
  2443  					}
  2444  				}
  2445  				// r will now be overwritten by the caller. At some point
  2446  				// later, the newly saved value will be moved back to its
  2447  				// final destination in processDest.
  2448  				return r
  2449  			}
  2450  		}
  2451  	}
  2452  
  2453  	fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
  2454  	for _, vid := range e.cachedVals {
  2455  		a := e.cache[vid]
  2456  		for _, c := range a {
  2457  			fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
  2458  		}
  2459  	}
  2460  	e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
  2461  	return nil
  2462  }
  2463  
  2464  // rematerializeable reports whether the register allocator should recompute
  2465  // a value instead of spilling/restoring it.
  2466  func (v *Value) rematerializeable() bool {
  2467  	if !opcodeTable[v.Op].rematerializeable {
  2468  		return false
  2469  	}
  2470  	for _, a := range v.Args {
  2471  		// SP and SB (generated by OpSP and OpSB) are always available.
  2472  		if a.Op != OpSP && a.Op != OpSB {
  2473  			return false
  2474  		}
  2475  	}
  2476  	return true
  2477  }
  2478  
  2479  type liveInfo struct {
  2480  	ID   ID       // ID of value
  2481  	dist int32    // # of instructions before next use
  2482  	pos  src.XPos // source position of next use
  2483  }
  2484  
  2485  // computeLive computes a map from block ID to a list of value IDs live at the end
  2486  // of that block. Together with the value ID is a count of how many instructions
  2487  // to the next use of that value. The resulting map is stored in s.live.
  2488  // computeLive also computes the desired register information at the end of each block.
  2489  // This desired register information is stored in s.desired.
  2490  // TODO: this could be quadratic if lots of variables are live across lots of
  2491  // basic blocks. Figure out a way to make this function (or, more precisely, the user
  2492  // of this function) require only linear size & time.
  2493  func (s *regAllocState) computeLive() {
  2494  	f := s.f
  2495  	s.live = make([][]liveInfo, f.NumBlocks())
  2496  	s.desired = make([]desiredState, f.NumBlocks())
  2497  	var phis []*Value
  2498  
  2499  	live := f.newSparseMap(f.NumValues())
  2500  	defer f.retSparseMap(live)
  2501  	t := f.newSparseMap(f.NumValues())
  2502  	defer f.retSparseMap(t)
  2503  
  2504  	// Keep track of which value we want in each register.
  2505  	var desired desiredState
  2506  
  2507  	// Instead of iterating over f.Blocks, iterate over their postordering.
  2508  	// Liveness information flows backward, so starting at the end
  2509  	// increases the probability that we will stabilize quickly.
  2510  	// TODO: Do a better job yet. Here's one possibility:
  2511  	// Calculate the dominator tree and locate all strongly connected components.
  2512  	// If a value is live in one block of an SCC, it is live in all.
  2513  	// Walk the dominator tree from end to beginning, just once, treating SCC
  2514  	// components as single blocks, duplicated calculated liveness information
  2515  	// out to all of them.
  2516  	po := f.postorder()
  2517  	s.loopnest = f.loopnest()
  2518  	s.loopnest.calculateDepths()
  2519  	for {
  2520  		changed := false
  2521  
  2522  		for _, b := range po {
  2523  			// Start with known live values at the end of the block.
  2524  			// Add len(b.Values) to adjust from end-of-block distance
  2525  			// to beginning-of-block distance.
  2526  			live.clear()
  2527  			for _, e := range s.live[b.ID] {
  2528  				live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
  2529  			}
  2530  
  2531  			// Mark control values as live
  2532  			for _, c := range b.ControlValues() {
  2533  				if s.values[c.ID].needReg {
  2534  					live.set(c.ID, int32(len(b.Values)), b.Pos)
  2535  				}
  2536  			}
  2537  
  2538  			// Propagate backwards to the start of the block
  2539  			// Assumes Values have been scheduled.
  2540  			phis = phis[:0]
  2541  			for i := len(b.Values) - 1; i >= 0; i-- {
  2542  				v := b.Values[i]
  2543  				live.remove(v.ID)
  2544  				if v.Op == OpPhi {
  2545  					// save phi ops for later
  2546  					phis = append(phis, v)
  2547  					continue
  2548  				}
  2549  				if opcodeTable[v.Op].call {
  2550  					c := live.contents()
  2551  					for i := range c {
  2552  						c[i].val += unlikelyDistance
  2553  					}
  2554  				}
  2555  				for _, a := range v.Args {
  2556  					if s.values[a.ID].needReg {
  2557  						live.set(a.ID, int32(i), v.Pos)
  2558  					}
  2559  				}
  2560  			}
  2561  			// Propagate desired registers backwards.
  2562  			desired.copy(&s.desired[b.ID])
  2563  			for i := len(b.Values) - 1; i >= 0; i-- {
  2564  				v := b.Values[i]
  2565  				prefs := desired.remove(v.ID)
  2566  				if v.Op == OpPhi {
  2567  					// TODO: if v is a phi, save desired register for phi inputs.
  2568  					// For now, we just drop it and don't propagate
  2569  					// desired registers back though phi nodes.
  2570  					continue
  2571  				}
  2572  				regspec := s.regspec(v)
  2573  				// Cancel desired registers if they get clobbered.
  2574  				desired.clobber(regspec.clobbers)
  2575  				// Update desired registers if there are any fixed register inputs.
  2576  				for _, j := range regspec.inputs {
  2577  					if countRegs(j.regs) != 1 {
  2578  						continue
  2579  					}
  2580  					desired.clobber(j.regs)
  2581  					desired.add(v.Args[j.idx].ID, pickReg(j.regs))
  2582  				}
  2583  				// Set desired register of input 0 if this is a 2-operand instruction.
  2584  				if opcodeTable[v.Op].resultInArg0 {
  2585  					if opcodeTable[v.Op].commutative {
  2586  						desired.addList(v.Args[1].ID, prefs)
  2587  					}
  2588  					desired.addList(v.Args[0].ID, prefs)
  2589  				}
  2590  			}
  2591  
  2592  			// For each predecessor of b, expand its list of live-at-end values.
  2593  			// invariant: live contains the values live at the start of b (excluding phi inputs)
  2594  			for i, e := range b.Preds {
  2595  				p := e.b
  2596  				// Compute additional distance for the edge.
  2597  				// Note: delta must be at least 1 to distinguish the control
  2598  				// value use from the first user in a successor block.
  2599  				delta := int32(normalDistance)
  2600  				if len(p.Succs) == 2 {
  2601  					if p.Succs[0].b == b && p.Likely == BranchLikely ||
  2602  						p.Succs[1].b == b && p.Likely == BranchUnlikely {
  2603  						delta = likelyDistance
  2604  					}
  2605  					if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
  2606  						p.Succs[1].b == b && p.Likely == BranchLikely {
  2607  						delta = unlikelyDistance
  2608  					}
  2609  				}
  2610  
  2611  				// Update any desired registers at the end of p.
  2612  				s.desired[p.ID].merge(&desired)
  2613  
  2614  				// Start t off with the previously known live values at the end of p.
  2615  				t.clear()
  2616  				for _, e := range s.live[p.ID] {
  2617  					t.set(e.ID, e.dist, e.pos)
  2618  				}
  2619  				update := false
  2620  
  2621  				// Add new live values from scanning this block.
  2622  				for _, e := range live.contents() {
  2623  					d := e.val + delta
  2624  					if !t.contains(e.key) || d < t.get(e.key) {
  2625  						update = true
  2626  						t.set(e.key, d, e.aux)
  2627  					}
  2628  				}
  2629  				// Also add the correct arg from the saved phi values.
  2630  				// All phis are at distance delta (we consider them
  2631  				// simultaneously happening at the start of the block).
  2632  				for _, v := range phis {
  2633  					id := v.Args[i].ID
  2634  					if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
  2635  						update = true
  2636  						t.set(id, delta, v.Pos)
  2637  					}
  2638  				}
  2639  
  2640  				if !update {
  2641  					continue
  2642  				}
  2643  				// The live set has changed, update it.
  2644  				l := s.live[p.ID][:0]
  2645  				if cap(l) < t.size() {
  2646  					l = make([]liveInfo, 0, t.size())
  2647  				}
  2648  				for _, e := range t.contents() {
  2649  					l = append(l, liveInfo{e.key, e.val, e.aux})
  2650  				}
  2651  				s.live[p.ID] = l
  2652  				changed = true
  2653  			}
  2654  		}
  2655  
  2656  		if !changed {
  2657  			break
  2658  		}
  2659  	}
  2660  	if f.pass.debug > regDebug {
  2661  		fmt.Println("live values at end of each block")
  2662  		for _, b := range f.Blocks {
  2663  			fmt.Printf("  %s:", b)
  2664  			for _, x := range s.live[b.ID] {
  2665  				fmt.Printf(" v%d(%d)", x.ID, x.dist)
  2666  				for _, e := range s.desired[b.ID].entries {
  2667  					if e.ID != x.ID {
  2668  						continue
  2669  					}
  2670  					fmt.Printf("[")
  2671  					first := true
  2672  					for _, r := range e.regs {
  2673  						if r == noRegister {
  2674  							continue
  2675  						}
  2676  						if !first {
  2677  							fmt.Printf(",")
  2678  						}
  2679  						fmt.Print(&s.registers[r])
  2680  						first = false
  2681  					}
  2682  					fmt.Printf("]")
  2683  				}
  2684  			}
  2685  			if avoid := s.desired[b.ID].avoid; avoid != 0 {
  2686  				fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
  2687  			}
  2688  			fmt.Println()
  2689  		}
  2690  	}
  2691  }
  2692  
  2693  // A desiredState represents desired register assignments.
  2694  type desiredState struct {
  2695  	// Desired assignments will be small, so we just use a list
  2696  	// of valueID+registers entries.
  2697  	entries []desiredStateEntry
  2698  	// Registers that other values want to be in.  This value will
  2699  	// contain at least the union of the regs fields of entries, but
  2700  	// may contain additional entries for values that were once in
  2701  	// this data structure but are no longer.
  2702  	avoid regMask
  2703  }
  2704  type desiredStateEntry struct {
  2705  	// (pre-regalloc) value
  2706  	ID ID
  2707  	// Registers it would like to be in, in priority order.
  2708  	// Unused slots are filled with noRegister.
  2709  	regs [4]register
  2710  }
  2711  
  2712  func (d *desiredState) clear() {
  2713  	d.entries = d.entries[:0]
  2714  	d.avoid = 0
  2715  }
  2716  
  2717  // get returns a list of desired registers for value vid.
  2718  func (d *desiredState) get(vid ID) [4]register {
  2719  	for _, e := range d.entries {
  2720  		if e.ID == vid {
  2721  			return e.regs
  2722  		}
  2723  	}
  2724  	return [4]register{noRegister, noRegister, noRegister, noRegister}
  2725  }
  2726  
  2727  // add records that we'd like value vid to be in register r.
  2728  func (d *desiredState) add(vid ID, r register) {
  2729  	d.avoid |= regMask(1) << r
  2730  	for i := range d.entries {
  2731  		e := &d.entries[i]
  2732  		if e.ID != vid {
  2733  			continue
  2734  		}
  2735  		if e.regs[0] == r {
  2736  			// Already known and highest priority
  2737  			return
  2738  		}
  2739  		for j := 1; j < len(e.regs); j++ {
  2740  			if e.regs[j] == r {
  2741  				// Move from lower priority to top priority
  2742  				copy(e.regs[1:], e.regs[:j])
  2743  				e.regs[0] = r
  2744  				return
  2745  			}
  2746  		}
  2747  		copy(e.regs[1:], e.regs[:])
  2748  		e.regs[0] = r
  2749  		return
  2750  	}
  2751  	d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
  2752  }
  2753  
  2754  func (d *desiredState) addList(vid ID, regs [4]register) {
  2755  	// regs is in priority order, so iterate in reverse order.
  2756  	for i := len(regs) - 1; i >= 0; i-- {
  2757  		r := regs[i]
  2758  		if r != noRegister {
  2759  			d.add(vid, r)
  2760  		}
  2761  	}
  2762  }
  2763  
  2764  // clobber erases any desired registers in the set m.
  2765  func (d *desiredState) clobber(m regMask) {
  2766  	for i := 0; i < len(d.entries); {
  2767  		e := &d.entries[i]
  2768  		j := 0
  2769  		for _, r := range e.regs {
  2770  			if r != noRegister && m>>r&1 == 0 {
  2771  				e.regs[j] = r
  2772  				j++
  2773  			}
  2774  		}
  2775  		if j == 0 {
  2776  			// No more desired registers for this value.
  2777  			d.entries[i] = d.entries[len(d.entries)-1]
  2778  			d.entries = d.entries[:len(d.entries)-1]
  2779  			continue
  2780  		}
  2781  		for ; j < len(e.regs); j++ {
  2782  			e.regs[j] = noRegister
  2783  		}
  2784  		i++
  2785  	}
  2786  	d.avoid &^= m
  2787  }
  2788  
  2789  // copy copies a desired state from another desiredState x.
  2790  func (d *desiredState) copy(x *desiredState) {
  2791  	d.entries = append(d.entries[:0], x.entries...)
  2792  	d.avoid = x.avoid
  2793  }
  2794  
  2795  // remove removes the desired registers for vid and returns them.
  2796  func (d *desiredState) remove(vid ID) [4]register {
  2797  	for i := range d.entries {
  2798  		if d.entries[i].ID == vid {
  2799  			regs := d.entries[i].regs
  2800  			d.entries[i] = d.entries[len(d.entries)-1]
  2801  			d.entries = d.entries[:len(d.entries)-1]
  2802  			return regs
  2803  		}
  2804  	}
  2805  	return [4]register{noRegister, noRegister, noRegister, noRegister}
  2806  }
  2807  
  2808  // merge merges another desired state x into d.
  2809  func (d *desiredState) merge(x *desiredState) {
  2810  	d.avoid |= x.avoid
  2811  	// There should only be a few desired registers, so
  2812  	// linear insert is ok.
  2813  	for _, e := range x.entries {
  2814  		d.addList(e.ID, e.regs)
  2815  	}
  2816  }
  2817  
  2818  func min32(x, y int32) int32 {
  2819  	if x < y {
  2820  		return x
  2821  	}
  2822  	return y
  2823  }
  2824  func max32(x, y int32) int32 {
  2825  	if x > y {
  2826  		return x
  2827  	}
  2828  	return y
  2829  }
  2830  

View as plain text