Source file src/cmd/link/internal/ld/deadcode.go

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ld
     6  
     7  import (
     8  	"cmd/internal/goobj"
     9  	"cmd/internal/objabi"
    10  	"cmd/internal/sys"
    11  	"cmd/link/internal/loader"
    12  	"cmd/link/internal/sym"
    13  	"fmt"
    14  	"internal/buildcfg"
    15  	"unicode"
    16  )
    17  
    18  var _ = fmt.Print
    19  
    20  type deadcodePass struct {
    21  	ctxt *Link
    22  	ldr  *loader.Loader
    23  	wq   heap // work queue, using min-heap for better locality
    24  
    25  	ifaceMethod        map[methodsig]bool // methods called from reached interface call sites
    26  	genericIfaceMethod map[string]bool    // names of methods called from reached generic interface call sites
    27  	markableMethods    []methodref        // methods of reached types
    28  	reflectSeen        bool               // whether we have seen a reflect method call
    29  	dynlink            bool
    30  
    31  	methodsigstmp []methodsig // scratch buffer for decoding method signatures
    32  }
    33  
    34  func (d *deadcodePass) init() {
    35  	d.ldr.InitReachable()
    36  	d.ifaceMethod = make(map[methodsig]bool)
    37  	d.genericIfaceMethod = make(map[string]bool)
    38  	if buildcfg.Experiment.FieldTrack {
    39  		d.ldr.Reachparent = make([]loader.Sym, d.ldr.NSym())
    40  	}
    41  	d.dynlink = d.ctxt.DynlinkingGo()
    42  
    43  	if d.ctxt.BuildMode == BuildModeShared {
    44  		// Mark all symbols defined in this library as reachable when
    45  		// building a shared library.
    46  		n := d.ldr.NDef()
    47  		for i := 1; i < n; i++ {
    48  			s := loader.Sym(i)
    49  			d.mark(s, 0)
    50  		}
    51  		return
    52  	}
    53  
    54  	var names []string
    55  
    56  	// In a normal binary, start at main.main and the init
    57  	// functions and mark what is reachable from there.
    58  	if d.ctxt.linkShared && (d.ctxt.BuildMode == BuildModeExe || d.ctxt.BuildMode == BuildModePIE) {
    59  		names = append(names, "main.main", "main..inittask")
    60  	} else {
    61  		// The external linker refers main symbol directly.
    62  		if d.ctxt.LinkMode == LinkExternal && (d.ctxt.BuildMode == BuildModeExe || d.ctxt.BuildMode == BuildModePIE) {
    63  			if d.ctxt.HeadType == objabi.Hwindows && d.ctxt.Arch.Family == sys.I386 {
    64  				*flagEntrySymbol = "_main"
    65  			} else {
    66  				*flagEntrySymbol = "main"
    67  			}
    68  		}
    69  		names = append(names, *flagEntrySymbol)
    70  	}
    71  	// runtime.unreachableMethod is a function that will throw if called.
    72  	// We redirect unreachable methods to it.
    73  	names = append(names, "runtime.unreachableMethod")
    74  	if d.ctxt.BuildMode == BuildModePlugin {
    75  		names = append(names, objabi.PathToPrefix(*flagPluginPath)+"..inittask", objabi.PathToPrefix(*flagPluginPath)+".main", "go.plugin.tabs")
    76  
    77  		// We don't keep the go.plugin.exports symbol,
    78  		// but we do keep the symbols it refers to.
    79  		exportsIdx := d.ldr.Lookup("go.plugin.exports", 0)
    80  		if exportsIdx != 0 {
    81  			relocs := d.ldr.Relocs(exportsIdx)
    82  			for i := 0; i < relocs.Count(); i++ {
    83  				d.mark(relocs.At(i).Sym(), 0)
    84  			}
    85  		}
    86  	}
    87  
    88  	if d.ctxt.Debugvlog > 1 {
    89  		d.ctxt.Logf("deadcode start names: %v\n", names)
    90  	}
    91  
    92  	for _, name := range names {
    93  		// Mark symbol as a data/ABI0 symbol.
    94  		d.mark(d.ldr.Lookup(name, 0), 0)
    95  		if abiInternalVer != 0 {
    96  			// Also mark any Go functions (internal ABI).
    97  			d.mark(d.ldr.Lookup(name, abiInternalVer), 0)
    98  		}
    99  	}
   100  
   101  	// All dynamic exports are roots.
   102  	for _, s := range d.ctxt.dynexp {
   103  		if d.ctxt.Debugvlog > 1 {
   104  			d.ctxt.Logf("deadcode start dynexp: %s<%d>\n", d.ldr.SymName(s), d.ldr.SymVersion(s))
   105  		}
   106  		d.mark(s, 0)
   107  	}
   108  }
   109  
   110  func (d *deadcodePass) flood() {
   111  	var methods []methodref
   112  	for !d.wq.empty() {
   113  		symIdx := d.wq.pop()
   114  
   115  		d.reflectSeen = d.reflectSeen || d.ldr.IsReflectMethod(symIdx)
   116  
   117  		isgotype := d.ldr.IsGoType(symIdx)
   118  		relocs := d.ldr.Relocs(symIdx)
   119  		var usedInIface bool
   120  
   121  		if isgotype {
   122  			if d.dynlink {
   123  				// When dynamic linking, a type may be passed across DSO
   124  				// boundary and get converted to interface at the other side.
   125  				d.ldr.SetAttrUsedInIface(symIdx, true)
   126  			}
   127  			usedInIface = d.ldr.AttrUsedInIface(symIdx)
   128  		}
   129  
   130  		methods = methods[:0]
   131  		for i := 0; i < relocs.Count(); i++ {
   132  			r := relocs.At(i)
   133  			// When build with "-linkshared", we can't tell if the interface
   134  			// method in itab will be used or not. Ignore the weak attribute.
   135  			if r.Weak() && !(d.ctxt.linkShared && d.ldr.IsItab(symIdx)) {
   136  				continue
   137  			}
   138  			t := r.Type()
   139  			switch t {
   140  			case objabi.R_METHODOFF:
   141  				if i+2 >= relocs.Count() {
   142  					panic("expect three consecutive R_METHODOFF relocs")
   143  				}
   144  				if usedInIface {
   145  					methods = append(methods, methodref{src: symIdx, r: i})
   146  					// The method descriptor is itself a type descriptor, and
   147  					// it can be used to reach other types, e.g. by using
   148  					// reflect.Type.Method(i).Type.In(j). We need to traverse
   149  					// its child types with UsedInIface set. (See also the
   150  					// comment below.)
   151  					rs := r.Sym()
   152  					if !d.ldr.AttrUsedInIface(rs) {
   153  						d.ldr.SetAttrUsedInIface(rs, true)
   154  						if d.ldr.AttrReachable(rs) {
   155  							d.ldr.SetAttrReachable(rs, false)
   156  							d.mark(rs, symIdx)
   157  						}
   158  					}
   159  				}
   160  				i += 2
   161  				continue
   162  			case objabi.R_USETYPE:
   163  				// type symbol used for DWARF. we need to load the symbol but it may not
   164  				// be otherwise reachable in the program.
   165  				// do nothing for now as we still load all type symbols.
   166  				continue
   167  			case objabi.R_USEIFACE:
   168  				// R_USEIFACE is a marker relocation that tells the linker the type is
   169  				// converted to an interface, i.e. should have UsedInIface set. See the
   170  				// comment below for why we need to unset the Reachable bit and re-mark it.
   171  				rs := r.Sym()
   172  				if !d.ldr.AttrUsedInIface(rs) {
   173  					d.ldr.SetAttrUsedInIface(rs, true)
   174  					if d.ldr.AttrReachable(rs) {
   175  						d.ldr.SetAttrReachable(rs, false)
   176  						d.mark(rs, symIdx)
   177  					}
   178  				}
   179  				continue
   180  			case objabi.R_USEIFACEMETHOD:
   181  				// R_USEIFACEMETHOD is a marker relocation that marks an interface
   182  				// method as used.
   183  				rs := r.Sym()
   184  				if d.ctxt.linkShared && (d.ldr.SymType(rs) == sym.SDYNIMPORT || d.ldr.SymType(rs) == sym.Sxxx) {
   185  					// Don't decode symbol from shared library (we'll mark all exported methods anyway).
   186  					// We check for both SDYNIMPORT and Sxxx because name-mangled symbols haven't
   187  					// been resolved at this point.
   188  					continue
   189  				}
   190  				m := d.decodeIfaceMethod(d.ldr, d.ctxt.Arch, rs, r.Add())
   191  				if d.ctxt.Debugvlog > 1 {
   192  					d.ctxt.Logf("reached iface method: %v\n", m)
   193  				}
   194  				d.ifaceMethod[m] = true
   195  				continue
   196  			case objabi.R_USEGENERICIFACEMETHOD:
   197  				name := d.decodeGenericIfaceMethod(d.ldr, r.Sym())
   198  				if d.ctxt.Debugvlog > 1 {
   199  					d.ctxt.Logf("reached generic iface method: %s\n", name)
   200  				}
   201  				d.genericIfaceMethod[name] = true
   202  				continue // don't mark referenced symbol - it is not needed in the final binary.
   203  			}
   204  			rs := r.Sym()
   205  			if isgotype && usedInIface && d.ldr.IsGoType(rs) && !d.ldr.AttrUsedInIface(rs) {
   206  				// If a type is converted to an interface, it is possible to obtain an
   207  				// interface with a "child" type of it using reflection (e.g. obtain an
   208  				// interface of T from []chan T). We need to traverse its "child" types
   209  				// with UsedInIface attribute set.
   210  				// When visiting the child type (chan T in the example above), it will
   211  				// have UsedInIface set, so it in turn will mark and (re)visit its children
   212  				// (e.g. T above).
   213  				// We unset the reachable bit here, so if the child type is already visited,
   214  				// it will be visited again.
   215  				// Note that a type symbol can be visited at most twice, one without
   216  				// UsedInIface and one with. So termination is still guaranteed.
   217  				d.ldr.SetAttrUsedInIface(rs, true)
   218  				d.ldr.SetAttrReachable(rs, false)
   219  			}
   220  			d.mark(rs, symIdx)
   221  		}
   222  		naux := d.ldr.NAux(symIdx)
   223  		for i := 0; i < naux; i++ {
   224  			a := d.ldr.Aux(symIdx, i)
   225  			if a.Type() == goobj.AuxGotype {
   226  				// A symbol being reachable doesn't imply we need its
   227  				// type descriptor. Don't mark it.
   228  				continue
   229  			}
   230  			d.mark(a.Sym(), symIdx)
   231  		}
   232  		// Some host object symbols have an outer object, which acts like a
   233  		// "carrier" symbol, or it holds all the symbols for a particular
   234  		// section. We need to mark all "referenced" symbols from that carrier,
   235  		// so we make sure we're pulling in all outer symbols, and their sub
   236  		// symbols. This is not ideal, and these carrier/section symbols could
   237  		// be removed.
   238  		if d.ldr.IsExternal(symIdx) {
   239  			d.mark(d.ldr.OuterSym(symIdx), symIdx)
   240  			d.mark(d.ldr.SubSym(symIdx), symIdx)
   241  		}
   242  
   243  		if len(methods) != 0 {
   244  			if !isgotype {
   245  				panic("method found on non-type symbol")
   246  			}
   247  			// Decode runtime type information for type methods
   248  			// to help work out which methods can be called
   249  			// dynamically via interfaces.
   250  			methodsigs := d.decodetypeMethods(d.ldr, d.ctxt.Arch, symIdx, &relocs)
   251  			if len(methods) != len(methodsigs) {
   252  				panic(fmt.Sprintf("%q has %d method relocations for %d methods", d.ldr.SymName(symIdx), len(methods), len(methodsigs)))
   253  			}
   254  			for i, m := range methodsigs {
   255  				methods[i].m = m
   256  				if d.ctxt.Debugvlog > 1 {
   257  					d.ctxt.Logf("markable method: %v of sym %v %s\n", m, symIdx, d.ldr.SymName(symIdx))
   258  				}
   259  			}
   260  			d.markableMethods = append(d.markableMethods, methods...)
   261  		}
   262  	}
   263  }
   264  
   265  func (d *deadcodePass) mark(symIdx, parent loader.Sym) {
   266  	if symIdx != 0 && !d.ldr.AttrReachable(symIdx) {
   267  		d.wq.push(symIdx)
   268  		d.ldr.SetAttrReachable(symIdx, true)
   269  		if buildcfg.Experiment.FieldTrack && d.ldr.Reachparent[symIdx] == 0 {
   270  			d.ldr.Reachparent[symIdx] = parent
   271  		}
   272  		if *flagDumpDep {
   273  			to := d.ldr.SymName(symIdx)
   274  			if to != "" {
   275  				if d.ldr.AttrUsedInIface(symIdx) {
   276  					to += " <UsedInIface>"
   277  				}
   278  				from := "_"
   279  				if parent != 0 {
   280  					from = d.ldr.SymName(parent)
   281  					if d.ldr.AttrUsedInIface(parent) {
   282  						from += " <UsedInIface>"
   283  					}
   284  				}
   285  				fmt.Printf("%s -> %s\n", from, to)
   286  			}
   287  		}
   288  	}
   289  }
   290  
   291  func (d *deadcodePass) markMethod(m methodref) {
   292  	relocs := d.ldr.Relocs(m.src)
   293  	d.mark(relocs.At(m.r).Sym(), m.src)
   294  	d.mark(relocs.At(m.r+1).Sym(), m.src)
   295  	d.mark(relocs.At(m.r+2).Sym(), m.src)
   296  }
   297  
   298  // deadcode marks all reachable symbols.
   299  //
   300  // The basis of the dead code elimination is a flood fill of symbols,
   301  // following their relocations, beginning at *flagEntrySymbol.
   302  //
   303  // This flood fill is wrapped in logic for pruning unused methods.
   304  // All methods are mentioned by relocations on their receiver's *rtype.
   305  // These relocations are specially defined as R_METHODOFF by the compiler
   306  // so we can detect and manipulated them here.
   307  //
   308  // There are three ways a method of a reachable type can be invoked:
   309  //
   310  //	1. direct call
   311  //	2. through a reachable interface type
   312  //	3. reflect.Value.Method (or MethodByName), or reflect.Type.Method
   313  //	   (or MethodByName)
   314  //
   315  // The first case is handled by the flood fill, a directly called method
   316  // is marked as reachable.
   317  //
   318  // The second case is handled by decomposing all reachable interface
   319  // types into method signatures. Each encountered method is compared
   320  // against the interface method signatures, if it matches it is marked
   321  // as reachable. This is extremely conservative, but easy and correct.
   322  //
   323  // The third case is handled by looking to see if any of:
   324  //	- reflect.Value.Method or MethodByName is reachable
   325  // 	- reflect.Type.Method or MethodByName is called (through the
   326  // 	  REFLECTMETHOD attribute marked by the compiler).
   327  // If any of these happen, all bets are off and all exported methods
   328  // of reachable types are marked reachable.
   329  //
   330  // Any unreached text symbols are removed from ctxt.Textp.
   331  func deadcode(ctxt *Link) {
   332  	ldr := ctxt.loader
   333  	d := deadcodePass{ctxt: ctxt, ldr: ldr}
   334  	d.init()
   335  	d.flood()
   336  
   337  	methSym := ldr.Lookup("reflect.Value.Method", abiInternalVer)
   338  	methByNameSym := ldr.Lookup("reflect.Value.MethodByName", abiInternalVer)
   339  
   340  	if ctxt.DynlinkingGo() {
   341  		// Exported methods may satisfy interfaces we don't know
   342  		// about yet when dynamically linking.
   343  		d.reflectSeen = true
   344  	}
   345  
   346  	for {
   347  		// Methods might be called via reflection. Give up on
   348  		// static analysis, mark all exported methods of
   349  		// all reachable types as reachable.
   350  		d.reflectSeen = d.reflectSeen || (methSym != 0 && ldr.AttrReachable(methSym)) || (methByNameSym != 0 && ldr.AttrReachable(methByNameSym))
   351  
   352  		// Mark all methods that could satisfy a discovered
   353  		// interface as reachable. We recheck old marked interfaces
   354  		// as new types (with new methods) may have been discovered
   355  		// in the last pass.
   356  		rem := d.markableMethods[:0]
   357  		for _, m := range d.markableMethods {
   358  			if (d.reflectSeen && (m.isExported() || d.dynlink)) || d.ifaceMethod[m.m] || d.genericIfaceMethod[m.m.name] {
   359  				d.markMethod(m)
   360  			} else {
   361  				rem = append(rem, m)
   362  			}
   363  		}
   364  		d.markableMethods = rem
   365  
   366  		if d.wq.empty() {
   367  			// No new work was discovered. Done.
   368  			break
   369  		}
   370  		d.flood()
   371  	}
   372  }
   373  
   374  // methodsig is a typed method signature (name + type).
   375  type methodsig struct {
   376  	name string
   377  	typ  loader.Sym // type descriptor symbol of the function
   378  }
   379  
   380  // methodref holds the relocations from a receiver type symbol to its
   381  // method. There are three relocations, one for each of the fields in
   382  // the reflect.method struct: mtyp, ifn, and tfn.
   383  type methodref struct {
   384  	m   methodsig
   385  	src loader.Sym // receiver type symbol
   386  	r   int        // the index of R_METHODOFF relocations
   387  }
   388  
   389  func (m methodref) isExported() bool {
   390  	for _, r := range m.m.name {
   391  		return unicode.IsUpper(r)
   392  	}
   393  	panic("methodref has no signature")
   394  }
   395  
   396  // decodeMethodSig decodes an array of method signature information.
   397  // Each element of the array is size bytes. The first 4 bytes is a
   398  // nameOff for the method name, and the next 4 bytes is a typeOff for
   399  // the function type.
   400  //
   401  // Conveniently this is the layout of both runtime.method and runtime.imethod.
   402  func (d *deadcodePass) decodeMethodSig(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, relocs *loader.Relocs, off, size, count int) []methodsig {
   403  	if cap(d.methodsigstmp) < count {
   404  		d.methodsigstmp = append(d.methodsigstmp[:0], make([]methodsig, count)...)
   405  	}
   406  	var methods = d.methodsigstmp[:count]
   407  	for i := 0; i < count; i++ {
   408  		methods[i].name = decodetypeName(ldr, symIdx, relocs, off)
   409  		methods[i].typ = decodeRelocSym(ldr, symIdx, relocs, int32(off+4))
   410  		off += size
   411  	}
   412  	return methods
   413  }
   414  
   415  // Decode the method of interface type symbol symIdx at offset off.
   416  func (d *deadcodePass) decodeIfaceMethod(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, off int64) methodsig {
   417  	p := ldr.Data(symIdx)
   418  	if p == nil {
   419  		panic(fmt.Sprintf("missing symbol %q", ldr.SymName(symIdx)))
   420  	}
   421  	if decodetypeKind(arch, p)&kindMask != kindInterface {
   422  		panic(fmt.Sprintf("symbol %q is not an interface", ldr.SymName(symIdx)))
   423  	}
   424  	relocs := ldr.Relocs(symIdx)
   425  	var m methodsig
   426  	m.name = decodetypeName(ldr, symIdx, &relocs, int(off))
   427  	m.typ = decodeRelocSym(ldr, symIdx, &relocs, int32(off+4))
   428  	return m
   429  }
   430  
   431  // Decode the method name stored in symbol symIdx. The symbol should contain just the bytes of a method name.
   432  func (d *deadcodePass) decodeGenericIfaceMethod(ldr *loader.Loader, symIdx loader.Sym) string {
   433  	return string(ldr.Data(symIdx))
   434  }
   435  
   436  func (d *deadcodePass) decodetypeMethods(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, relocs *loader.Relocs) []methodsig {
   437  	p := ldr.Data(symIdx)
   438  	if !decodetypeHasUncommon(arch, p) {
   439  		panic(fmt.Sprintf("no methods on %q", ldr.SymName(symIdx)))
   440  	}
   441  	off := commonsize(arch) // reflect.rtype
   442  	switch decodetypeKind(arch, p) & kindMask {
   443  	case kindStruct: // reflect.structType
   444  		off += 4 * arch.PtrSize
   445  	case kindPtr: // reflect.ptrType
   446  		off += arch.PtrSize
   447  	case kindFunc: // reflect.funcType
   448  		off += arch.PtrSize // 4 bytes, pointer aligned
   449  	case kindSlice: // reflect.sliceType
   450  		off += arch.PtrSize
   451  	case kindArray: // reflect.arrayType
   452  		off += 3 * arch.PtrSize
   453  	case kindChan: // reflect.chanType
   454  		off += 2 * arch.PtrSize
   455  	case kindMap: // reflect.mapType
   456  		off += 4*arch.PtrSize + 8
   457  	case kindInterface: // reflect.interfaceType
   458  		off += 3 * arch.PtrSize
   459  	default:
   460  		// just Sizeof(rtype)
   461  	}
   462  
   463  	mcount := int(decodeInuxi(arch, p[off+4:], 2))
   464  	moff := int(decodeInuxi(arch, p[off+4+2+2:], 4))
   465  	off += moff                // offset to array of reflect.method values
   466  	const sizeofMethod = 4 * 4 // sizeof reflect.method in program
   467  	return d.decodeMethodSig(ldr, arch, symIdx, relocs, off, sizeofMethod, mcount)
   468  }
   469  

View as plain text