Source file src/cmd/compile/internal/ssa/branchelim.go

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import "cmd/internal/src"
     8  
     9  // branchelim tries to eliminate branches by
    10  // generating CondSelect instructions.
    11  //
    12  // Search for basic blocks that look like
    13  //
    14  // bb0            bb0
    15  //  | \          /   \
    16  //  | bb1  or  bb1   bb2    <- trivial if/else blocks
    17  //  | /          \   /
    18  // bb2            bb3
    19  //
    20  // where the intermediate blocks are mostly empty (with no side-effects);
    21  // rewrite Phis in the postdominator as CondSelects.
    22  func branchelim(f *Func) {
    23  	// FIXME: add support for lowering CondSelects on more architectures
    24  	switch f.Config.arch {
    25  	case "arm64", "ppc64le", "ppc64", "amd64", "wasm":
    26  		// implemented
    27  	default:
    28  		return
    29  	}
    30  
    31  	// Find all the values used in computing the address of any load.
    32  	// Typically these values have operations like AddPtr, Lsh64x64, etc.
    33  	loadAddr := f.newSparseSet(f.NumValues())
    34  	defer f.retSparseSet(loadAddr)
    35  	for _, b := range f.Blocks {
    36  		for _, v := range b.Values {
    37  			switch v.Op {
    38  			case OpLoad, OpAtomicLoad8, OpAtomicLoad32, OpAtomicLoad64, OpAtomicLoadPtr, OpAtomicLoadAcq32, OpAtomicLoadAcq64:
    39  				loadAddr.add(v.Args[0].ID)
    40  			case OpMove:
    41  				loadAddr.add(v.Args[1].ID)
    42  			}
    43  		}
    44  	}
    45  	po := f.postorder()
    46  	for {
    47  		n := loadAddr.size()
    48  		for _, b := range po {
    49  			for i := len(b.Values) - 1; i >= 0; i-- {
    50  				v := b.Values[i]
    51  				if !loadAddr.contains(v.ID) {
    52  					continue
    53  				}
    54  				for _, a := range v.Args {
    55  					if a.Type.IsInteger() || a.Type.IsPtr() || a.Type.IsUnsafePtr() {
    56  						loadAddr.add(a.ID)
    57  					}
    58  				}
    59  			}
    60  		}
    61  		if loadAddr.size() == n {
    62  			break
    63  		}
    64  	}
    65  
    66  	change := true
    67  	for change {
    68  		change = false
    69  		for _, b := range f.Blocks {
    70  			change = elimIf(f, loadAddr, b) || elimIfElse(f, loadAddr, b) || change
    71  		}
    72  	}
    73  }
    74  
    75  func canCondSelect(v *Value, arch string, loadAddr *sparseSet) bool {
    76  	if loadAddr.contains(v.ID) {
    77  		// The result of the soon-to-be conditional move is used to compute a load address.
    78  		// We want to avoid generating a conditional move in this case
    79  		// because the load address would now be data-dependent on the condition.
    80  		// Previously it would only be control-dependent on the condition, which is faster
    81  		// if the branch predicts well (or possibly even if it doesn't, if the load will
    82  		// be an expensive cache miss).
    83  		// See issue #26306.
    84  		return false
    85  	}
    86  	// For now, stick to simple scalars that fit in registers
    87  	switch {
    88  	case v.Type.Size() > v.Block.Func.Config.RegSize:
    89  		return false
    90  	case v.Type.IsPtrShaped():
    91  		return true
    92  	case v.Type.IsInteger():
    93  		if arch == "amd64" && v.Type.Size() < 2 {
    94  			// amd64 doesn't support CMOV with byte registers
    95  			return false
    96  		}
    97  		return true
    98  	default:
    99  		return false
   100  	}
   101  }
   102  
   103  // elimIf converts the one-way branch starting at dom in f to a conditional move if possible.
   104  // loadAddr is a set of values which are used to compute the address of a load.
   105  // Those values are exempt from CMOV generation.
   106  func elimIf(f *Func, loadAddr *sparseSet, dom *Block) bool {
   107  	// See if dom is an If with one arm that
   108  	// is trivial and succeeded by the other
   109  	// successor of dom.
   110  	if dom.Kind != BlockIf || dom.Likely != BranchUnknown {
   111  		return false
   112  	}
   113  	var simple, post *Block
   114  	for i := range dom.Succs {
   115  		bb, other := dom.Succs[i].Block(), dom.Succs[i^1].Block()
   116  		if isLeafPlain(bb) && bb.Succs[0].Block() == other {
   117  			simple = bb
   118  			post = other
   119  			break
   120  		}
   121  	}
   122  	if simple == nil || len(post.Preds) != 2 || post == dom {
   123  		return false
   124  	}
   125  
   126  	// We've found our diamond CFG of blocks.
   127  	// Now decide if fusing 'simple' into dom+post
   128  	// looks profitable.
   129  
   130  	// Check that there are Phis, and that all of them
   131  	// can be safely rewritten to CondSelect.
   132  	hasphis := false
   133  	for _, v := range post.Values {
   134  		if v.Op == OpPhi {
   135  			hasphis = true
   136  			if !canCondSelect(v, f.Config.arch, loadAddr) {
   137  				return false
   138  			}
   139  		}
   140  	}
   141  	if !hasphis {
   142  		return false
   143  	}
   144  
   145  	// Pick some upper bound for the number of instructions
   146  	// we'd be willing to execute just to generate a dead
   147  	// argument to CondSelect. In the worst case, this is
   148  	// the number of useless instructions executed.
   149  	const maxfuseinsts = 2
   150  
   151  	if len(simple.Values) > maxfuseinsts || !canSpeculativelyExecute(simple) {
   152  		return false
   153  	}
   154  
   155  	// Replace Phi instructions in b with CondSelect instructions
   156  	swap := (post.Preds[0].Block() == dom) != (dom.Succs[0].Block() == post)
   157  	for _, v := range post.Values {
   158  		if v.Op != OpPhi {
   159  			continue
   160  		}
   161  		v.Op = OpCondSelect
   162  		if swap {
   163  			v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
   164  		}
   165  		v.AddArg(dom.Controls[0])
   166  	}
   167  
   168  	// Put all of the instructions into 'dom'
   169  	// and update the CFG appropriately.
   170  	dom.Kind = post.Kind
   171  	dom.CopyControls(post)
   172  	dom.Aux = post.Aux
   173  	dom.Succs = append(dom.Succs[:0], post.Succs...)
   174  	for i := range dom.Succs {
   175  		e := dom.Succs[i]
   176  		e.b.Preds[e.i].b = dom
   177  	}
   178  
   179  	// Try really hard to preserve statement marks attached to blocks.
   180  	simplePos := simple.Pos
   181  	postPos := post.Pos
   182  	simpleStmt := simplePos.IsStmt() == src.PosIsStmt
   183  	postStmt := postPos.IsStmt() == src.PosIsStmt
   184  
   185  	for _, v := range simple.Values {
   186  		v.Block = dom
   187  	}
   188  	for _, v := range post.Values {
   189  		v.Block = dom
   190  	}
   191  
   192  	// findBlockPos determines if b contains a stmt-marked value
   193  	// that has the same line number as the Pos for b itself.
   194  	// (i.e. is the position on b actually redundant?)
   195  	findBlockPos := func(b *Block) bool {
   196  		pos := b.Pos
   197  		for _, v := range b.Values {
   198  			// See if there is a stmt-marked value already that matches simple.Pos (and perhaps post.Pos)
   199  			if pos.SameFileAndLine(v.Pos) && v.Pos.IsStmt() == src.PosIsStmt {
   200  				return true
   201  			}
   202  		}
   203  		return false
   204  	}
   205  	if simpleStmt {
   206  		simpleStmt = !findBlockPos(simple)
   207  		if !simpleStmt && simplePos.SameFileAndLine(postPos) {
   208  			postStmt = false
   209  		}
   210  
   211  	}
   212  	if postStmt {
   213  		postStmt = !findBlockPos(post)
   214  	}
   215  
   216  	// If simpleStmt and/or postStmt are still true, then try harder
   217  	// to find the corresponding statement marks new homes.
   218  
   219  	// setBlockPos determines if b contains a can-be-statement value
   220  	// that has the same line number as the Pos for b itself, and
   221  	// puts a statement mark on it, and returns whether it succeeded
   222  	// in this operation.
   223  	setBlockPos := func(b *Block) bool {
   224  		pos := b.Pos
   225  		for _, v := range b.Values {
   226  			if pos.SameFileAndLine(v.Pos) && !isPoorStatementOp(v.Op) {
   227  				v.Pos = v.Pos.WithIsStmt()
   228  				return true
   229  			}
   230  		}
   231  		return false
   232  	}
   233  	// If necessary and possible, add a mark to a value in simple
   234  	if simpleStmt {
   235  		if setBlockPos(simple) && simplePos.SameFileAndLine(postPos) {
   236  			postStmt = false
   237  		}
   238  	}
   239  	// If necessary and possible, add a mark to a value in post
   240  	if postStmt {
   241  		postStmt = !setBlockPos(post)
   242  	}
   243  
   244  	// Before giving up (this was added because it helps), try the end of "dom", and if that is not available,
   245  	// try the values in the successor block if it is uncomplicated.
   246  	if postStmt {
   247  		if dom.Pos.IsStmt() != src.PosIsStmt {
   248  			dom.Pos = postPos
   249  		} else {
   250  			// Try the successor block
   251  			if len(dom.Succs) == 1 && len(dom.Succs[0].Block().Preds) == 1 {
   252  				succ := dom.Succs[0].Block()
   253  				for _, v := range succ.Values {
   254  					if isPoorStatementOp(v.Op) {
   255  						continue
   256  					}
   257  					if postPos.SameFileAndLine(v.Pos) {
   258  						v.Pos = v.Pos.WithIsStmt()
   259  					}
   260  					postStmt = false
   261  					break
   262  				}
   263  				// If postStmt still true, tag the block itself if possible
   264  				if postStmt && succ.Pos.IsStmt() != src.PosIsStmt {
   265  					succ.Pos = postPos
   266  				}
   267  			}
   268  		}
   269  	}
   270  
   271  	dom.Values = append(dom.Values, simple.Values...)
   272  	dom.Values = append(dom.Values, post.Values...)
   273  
   274  	// Trash 'post' and 'simple'
   275  	clobberBlock(post)
   276  	clobberBlock(simple)
   277  
   278  	f.invalidateCFG()
   279  	return true
   280  }
   281  
   282  // is this a BlockPlain with one predecessor?
   283  func isLeafPlain(b *Block) bool {
   284  	return b.Kind == BlockPlain && len(b.Preds) == 1
   285  }
   286  
   287  func clobberBlock(b *Block) {
   288  	b.Values = nil
   289  	b.Preds = nil
   290  	b.Succs = nil
   291  	b.Aux = nil
   292  	b.ResetControls()
   293  	b.Likely = BranchUnknown
   294  	b.Kind = BlockInvalid
   295  }
   296  
   297  // elimIfElse converts the two-way branch starting at dom in f to a conditional move if possible.
   298  // loadAddr is a set of values which are used to compute the address of a load.
   299  // Those values are exempt from CMOV generation.
   300  func elimIfElse(f *Func, loadAddr *sparseSet, b *Block) bool {
   301  	// See if 'b' ends in an if/else: it should
   302  	// have two successors, both of which are BlockPlain
   303  	// and succeeded by the same block.
   304  	if b.Kind != BlockIf || b.Likely != BranchUnknown {
   305  		return false
   306  	}
   307  	yes, no := b.Succs[0].Block(), b.Succs[1].Block()
   308  	if !isLeafPlain(yes) || len(yes.Values) > 1 || !canSpeculativelyExecute(yes) {
   309  		return false
   310  	}
   311  	if !isLeafPlain(no) || len(no.Values) > 1 || !canSpeculativelyExecute(no) {
   312  		return false
   313  	}
   314  	if b.Succs[0].Block().Succs[0].Block() != b.Succs[1].Block().Succs[0].Block() {
   315  		return false
   316  	}
   317  	// block that postdominates the if/else
   318  	post := b.Succs[0].Block().Succs[0].Block()
   319  	if len(post.Preds) != 2 || post == b {
   320  		return false
   321  	}
   322  	hasphis := false
   323  	for _, v := range post.Values {
   324  		if v.Op == OpPhi {
   325  			hasphis = true
   326  			if !canCondSelect(v, f.Config.arch, loadAddr) {
   327  				return false
   328  			}
   329  		}
   330  	}
   331  	if !hasphis {
   332  		return false
   333  	}
   334  
   335  	// Don't generate CondSelects if branch is cheaper.
   336  	if !shouldElimIfElse(no, yes, post, f.Config.arch) {
   337  		return false
   338  	}
   339  
   340  	// now we're committed: rewrite each Phi as a CondSelect
   341  	swap := post.Preds[0].Block() != b.Succs[0].Block()
   342  	for _, v := range post.Values {
   343  		if v.Op != OpPhi {
   344  			continue
   345  		}
   346  		v.Op = OpCondSelect
   347  		if swap {
   348  			v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
   349  		}
   350  		v.AddArg(b.Controls[0])
   351  	}
   352  
   353  	// Move the contents of all of these
   354  	// blocks into 'b' and update CFG edges accordingly
   355  	b.Kind = post.Kind
   356  	b.CopyControls(post)
   357  	b.Aux = post.Aux
   358  	b.Succs = append(b.Succs[:0], post.Succs...)
   359  	for i := range b.Succs {
   360  		e := b.Succs[i]
   361  		e.b.Preds[e.i].b = b
   362  	}
   363  	for i := range post.Values {
   364  		post.Values[i].Block = b
   365  	}
   366  	for i := range yes.Values {
   367  		yes.Values[i].Block = b
   368  	}
   369  	for i := range no.Values {
   370  		no.Values[i].Block = b
   371  	}
   372  	b.Values = append(b.Values, yes.Values...)
   373  	b.Values = append(b.Values, no.Values...)
   374  	b.Values = append(b.Values, post.Values...)
   375  
   376  	// trash post, yes, and no
   377  	clobberBlock(yes)
   378  	clobberBlock(no)
   379  	clobberBlock(post)
   380  
   381  	f.invalidateCFG()
   382  	return true
   383  }
   384  
   385  // shouldElimIfElse reports whether estimated cost of eliminating branch
   386  // is lower than threshold.
   387  func shouldElimIfElse(no, yes, post *Block, arch string) bool {
   388  	switch arch {
   389  	default:
   390  		return true
   391  	case "amd64":
   392  		const maxcost = 2
   393  		phi := 0
   394  		other := 0
   395  		for _, v := range post.Values {
   396  			if v.Op == OpPhi {
   397  				// Each phi results in CondSelect, which lowers into CMOV,
   398  				// CMOV has latency >1 on most CPUs.
   399  				phi++
   400  			}
   401  			for _, x := range v.Args {
   402  				if x.Block == no || x.Block == yes {
   403  					other++
   404  				}
   405  			}
   406  		}
   407  		cost := phi * 1
   408  		if phi > 1 {
   409  			// If we have more than 1 phi and some values in post have args
   410  			// in yes or no blocks, we may have to recalculate condition, because
   411  			// those args may clobber flags. For now assume that all operations clobber flags.
   412  			cost += other * 1
   413  		}
   414  		return cost < maxcost
   415  	}
   416  }
   417  
   418  // canSpeculativelyExecute reports whether every value in the block can
   419  // be evaluated without causing any observable side effects (memory
   420  // accesses, panics and so on) except for execution time changes. It
   421  // also ensures that the block does not contain any phis which we can't
   422  // speculatively execute.
   423  // Warning: this function cannot currently detect values that represent
   424  // instructions the execution of which need to be guarded with CPU
   425  // hardware feature checks. See issue #34950.
   426  func canSpeculativelyExecute(b *Block) bool {
   427  	// don't fuse memory ops, Phi ops, divides (can panic),
   428  	// or anything else with side-effects
   429  	for _, v := range b.Values {
   430  		if v.Op == OpPhi || isDivMod(v.Op) || v.Type.IsMemory() ||
   431  			v.MemoryArg() != nil || opcodeTable[v.Op].hasSideEffects {
   432  			return false
   433  		}
   434  	}
   435  	return true
   436  }
   437  
   438  func isDivMod(op Op) bool {
   439  	switch op {
   440  	case OpDiv8, OpDiv8u, OpDiv16, OpDiv16u,
   441  		OpDiv32, OpDiv32u, OpDiv64, OpDiv64u, OpDiv128u,
   442  		OpDiv32F, OpDiv64F,
   443  		OpMod8, OpMod8u, OpMod16, OpMod16u,
   444  		OpMod32, OpMod32u, OpMod64, OpMod64u:
   445  		return true
   446  	default:
   447  		return false
   448  	}
   449  }
   450  

View as plain text