Source file src/cmd/internal/obj/x86/obj6.go

     1  // Inferno utils/6l/pass.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/pass.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/src"
    37  	"cmd/internal/sys"
    38  	"log"
    39  	"math"
    40  	"path"
    41  	"strings"
    42  )
    43  
    44  func CanUse1InsnTLS(ctxt *obj.Link) bool {
    45  	if isAndroid {
    46  		// Android uses a global variable for the tls offset.
    47  		return false
    48  	}
    49  
    50  	if ctxt.Arch.Family == sys.I386 {
    51  		switch ctxt.Headtype {
    52  		case objabi.Hlinux,
    53  			objabi.Hplan9,
    54  			objabi.Hwindows:
    55  			return false
    56  		}
    57  
    58  		return true
    59  	}
    60  
    61  	switch ctxt.Headtype {
    62  	case objabi.Hplan9, objabi.Hwindows:
    63  		return false
    64  	case objabi.Hlinux, objabi.Hfreebsd:
    65  		return !ctxt.Flag_shared
    66  	}
    67  
    68  	return true
    69  }
    70  
    71  func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
    72  	// Thread-local storage references use the TLS pseudo-register.
    73  	// As a register, TLS refers to the thread-local storage base, and it
    74  	// can only be loaded into another register:
    75  	//
    76  	//         MOVQ TLS, AX
    77  	//
    78  	// An offset from the thread-local storage base is written off(reg)(TLS*1).
    79  	// Semantically it is off(reg), but the (TLS*1) annotation marks this as
    80  	// indexing from the loaded TLS base. This emits a relocation so that
    81  	// if the linker needs to adjust the offset, it can. For example:
    82  	//
    83  	//         MOVQ TLS, AX
    84  	//         MOVQ 0(AX)(TLS*1), CX // load g into CX
    85  	//
    86  	// On systems that support direct access to the TLS memory, this
    87  	// pair of instructions can be reduced to a direct TLS memory reference:
    88  	//
    89  	//         MOVQ 0(TLS), CX // load g into CX
    90  	//
    91  	// The 2-instruction and 1-instruction forms correspond to the two code
    92  	// sequences for loading a TLS variable in the local exec model given in "ELF
    93  	// Handling For Thread-Local Storage".
    94  	//
    95  	// We apply this rewrite on systems that support the 1-instruction form.
    96  	// The decision is made using only the operating system and the -shared flag,
    97  	// not the link mode. If some link modes on a particular operating system
    98  	// require the 2-instruction form, then all builds for that operating system
    99  	// will use the 2-instruction form, so that the link mode decision can be
   100  	// delayed to link time.
   101  	//
   102  	// In this way, all supported systems use identical instructions to
   103  	// access TLS, and they are rewritten appropriately first here in
   104  	// liblink and then finally using relocations in the linker.
   105  	//
   106  	// When -shared is passed, we leave the code in the 2-instruction form but
   107  	// assemble (and relocate) them in different ways to generate the initial
   108  	// exec code sequence. It's a bit of a fluke that this is possible without
   109  	// rewriting the instructions more comprehensively, and it only does because
   110  	// we only support a single TLS variable (g).
   111  
   112  	if CanUse1InsnTLS(ctxt) {
   113  		// Reduce 2-instruction sequence to 1-instruction sequence.
   114  		// Sequences like
   115  		//	MOVQ TLS, BX
   116  		//	... off(BX)(TLS*1) ...
   117  		// become
   118  		//	NOP
   119  		//	... off(TLS) ...
   120  		//
   121  		// TODO(rsc): Remove the Hsolaris special case. It exists only to
   122  		// guarantee we are producing byte-identical binaries as before this code.
   123  		// But it should be unnecessary.
   124  		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != objabi.Hsolaris {
   125  			obj.Nopout(p)
   126  		}
   127  		if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 {
   128  			p.From.Reg = REG_TLS
   129  			p.From.Scale = 0
   130  			p.From.Index = REG_NONE
   131  		}
   132  
   133  		if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   134  			p.To.Reg = REG_TLS
   135  			p.To.Scale = 0
   136  			p.To.Index = REG_NONE
   137  		}
   138  	} else {
   139  		// load_g, below, always inserts the 1-instruction sequence. Rewrite it
   140  		// as the 2-instruction sequence if necessary.
   141  		//	MOVQ 0(TLS), BX
   142  		// becomes
   143  		//	MOVQ TLS, BX
   144  		//	MOVQ 0(BX)(TLS*1), BX
   145  		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   146  			q := obj.Appendp(p, newprog)
   147  			q.As = p.As
   148  			q.From = p.From
   149  			q.From.Type = obj.TYPE_MEM
   150  			q.From.Reg = p.To.Reg
   151  			q.From.Index = REG_TLS
   152  			q.From.Scale = 2 // TODO: use 1
   153  			q.To = p.To
   154  			p.From.Type = obj.TYPE_REG
   155  			p.From.Reg = REG_TLS
   156  			p.From.Index = REG_NONE
   157  			p.From.Offset = 0
   158  		}
   159  	}
   160  
   161  	// Android uses a tls offset determined at runtime. Rewrite
   162  	//	MOVQ TLS, BX
   163  	// to
   164  	//	MOVQ runtime.tls_g(SB), BX
   165  	if isAndroid && (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   166  		p.From.Type = obj.TYPE_MEM
   167  		p.From.Name = obj.NAME_EXTERN
   168  		p.From.Reg = REG_NONE
   169  		p.From.Sym = ctxt.Lookup("runtime.tls_g")
   170  		p.From.Index = REG_NONE
   171  	}
   172  
   173  	// TODO: Remove.
   174  	if ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64 || ctxt.Headtype == objabi.Hplan9 {
   175  		if p.From.Scale == 1 && p.From.Index == REG_TLS {
   176  			p.From.Scale = 2
   177  		}
   178  		if p.To.Scale == 1 && p.To.Index == REG_TLS {
   179  			p.To.Scale = 2
   180  		}
   181  	}
   182  
   183  	// Rewrite 0 to $0 in 3rd argument to CMPPS etc.
   184  	// That's what the tables expect.
   185  	switch p.As {
   186  	case ACMPPD, ACMPPS, ACMPSD, ACMPSS:
   187  		if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil {
   188  			p.To.Type = obj.TYPE_CONST
   189  		}
   190  	}
   191  
   192  	// Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH.
   193  	switch p.As {
   194  	case obj.ACALL, obj.AJMP, obj.ARET:
   195  		if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil {
   196  			p.To.Type = obj.TYPE_BRANCH
   197  		}
   198  	}
   199  
   200  	// Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ.
   201  	if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) {
   202  		switch p.As {
   203  		case AMOVL:
   204  			p.As = ALEAL
   205  			p.From.Type = obj.TYPE_MEM
   206  		case AMOVQ:
   207  			p.As = ALEAQ
   208  			p.From.Type = obj.TYPE_MEM
   209  		}
   210  	}
   211  
   212  	// Rewrite float constants to values stored in memory.
   213  	switch p.As {
   214  	// Convert AMOVSS $(0), Xx to AXORPS Xx, Xx
   215  	case AMOVSS:
   216  		if p.From.Type == obj.TYPE_FCONST {
   217  			//  f == 0 can't be used here due to -0, so use Float64bits
   218  			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
   219  				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
   220  					p.As = AXORPS
   221  					p.From = p.To
   222  					break
   223  				}
   224  			}
   225  		}
   226  		fallthrough
   227  
   228  	case AFMOVF,
   229  		AFADDF,
   230  		AFSUBF,
   231  		AFSUBRF,
   232  		AFMULF,
   233  		AFDIVF,
   234  		AFDIVRF,
   235  		AFCOMF,
   236  		AFCOMFP,
   237  		AADDSS,
   238  		ASUBSS,
   239  		AMULSS,
   240  		ADIVSS,
   241  		ACOMISS,
   242  		AUCOMISS:
   243  		if p.From.Type == obj.TYPE_FCONST {
   244  			f32 := float32(p.From.Val.(float64))
   245  			p.From.Type = obj.TYPE_MEM
   246  			p.From.Name = obj.NAME_EXTERN
   247  			p.From.Sym = ctxt.Float32Sym(f32)
   248  			p.From.Offset = 0
   249  		}
   250  
   251  	case AMOVSD:
   252  		// Convert AMOVSD $(0), Xx to AXORPS Xx, Xx
   253  		if p.From.Type == obj.TYPE_FCONST {
   254  			//  f == 0 can't be used here due to -0, so use Float64bits
   255  			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
   256  				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
   257  					p.As = AXORPS
   258  					p.From = p.To
   259  					break
   260  				}
   261  			}
   262  		}
   263  		fallthrough
   264  
   265  	case AFMOVD,
   266  		AFADDD,
   267  		AFSUBD,
   268  		AFSUBRD,
   269  		AFMULD,
   270  		AFDIVD,
   271  		AFDIVRD,
   272  		AFCOMD,
   273  		AFCOMDP,
   274  		AADDSD,
   275  		ASUBSD,
   276  		AMULSD,
   277  		ADIVSD,
   278  		ACOMISD,
   279  		AUCOMISD:
   280  		if p.From.Type == obj.TYPE_FCONST {
   281  			f64 := p.From.Val.(float64)
   282  			p.From.Type = obj.TYPE_MEM
   283  			p.From.Name = obj.NAME_EXTERN
   284  			p.From.Sym = ctxt.Float64Sym(f64)
   285  			p.From.Offset = 0
   286  		}
   287  	}
   288  
   289  	if ctxt.Flag_dynlink {
   290  		rewriteToUseGot(ctxt, p, newprog)
   291  	}
   292  
   293  	if ctxt.Flag_shared && ctxt.Arch.Family == sys.I386 {
   294  		rewriteToPcrel(ctxt, p, newprog)
   295  	}
   296  }
   297  
   298  // Rewrite p, if necessary, to access global data via the global offset table.
   299  func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
   300  	var lea, mov obj.As
   301  	var reg int16
   302  	if ctxt.Arch.Family == sys.AMD64 {
   303  		lea = ALEAQ
   304  		mov = AMOVQ
   305  		reg = REG_R15
   306  	} else {
   307  		lea = ALEAL
   308  		mov = AMOVL
   309  		reg = REG_CX
   310  		if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
   311  			// Special case: clobber the destination register with
   312  			// the PC so we don't have to clobber CX.
   313  			// The SSA backend depends on CX not being clobbered across LEAL.
   314  			// See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared).
   315  			reg = p.To.Reg
   316  		}
   317  	}
   318  
   319  	if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO {
   320  		//     ADUFFxxx $offset
   321  		// becomes
   322  		//     $MOV runtime.duffxxx@GOT, $reg
   323  		//     $LEA $offset($reg), $reg
   324  		//     CALL $reg
   325  		// (we use LEAx rather than ADDx because ADDx clobbers
   326  		// flags and duffzero on 386 does not otherwise do so).
   327  		var sym *obj.LSym
   328  		if p.As == obj.ADUFFZERO {
   329  			sym = ctxt.LookupABI("runtime.duffzero", obj.ABIInternal)
   330  		} else {
   331  			sym = ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal)
   332  		}
   333  		offset := p.To.Offset
   334  		p.As = mov
   335  		p.From.Type = obj.TYPE_MEM
   336  		p.From.Name = obj.NAME_GOTREF
   337  		p.From.Sym = sym
   338  		p.To.Type = obj.TYPE_REG
   339  		p.To.Reg = reg
   340  		p.To.Offset = 0
   341  		p.To.Sym = nil
   342  		p1 := obj.Appendp(p, newprog)
   343  		p1.As = lea
   344  		p1.From.Type = obj.TYPE_MEM
   345  		p1.From.Offset = offset
   346  		p1.From.Reg = reg
   347  		p1.To.Type = obj.TYPE_REG
   348  		p1.To.Reg = reg
   349  		p2 := obj.Appendp(p1, newprog)
   350  		p2.As = obj.ACALL
   351  		p2.To.Type = obj.TYPE_REG
   352  		p2.To.Reg = reg
   353  	}
   354  
   355  	// We only care about global data: NAME_EXTERN means a global
   356  	// symbol in the Go sense, and p.Sym.Local is true for a few
   357  	// internally defined symbols.
   358  	if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   359  		// $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below
   360  		p.As = mov
   361  		p.From.Type = obj.TYPE_ADDR
   362  	}
   363  	if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   364  		// $MOV $sym, Rx becomes $MOV sym@GOT, Rx
   365  		// $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx
   366  		// On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX
   367  		cmplxdest := false
   368  		pAs := p.As
   369  		var dest obj.Addr
   370  		if p.To.Type != obj.TYPE_REG || pAs != mov {
   371  			if ctxt.Arch.Family == sys.AMD64 {
   372  				ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p)
   373  			}
   374  			cmplxdest = true
   375  			dest = p.To
   376  			p.As = mov
   377  			p.To.Type = obj.TYPE_REG
   378  			p.To.Reg = reg
   379  			p.To.Sym = nil
   380  			p.To.Name = obj.NAME_NONE
   381  		}
   382  		p.From.Type = obj.TYPE_MEM
   383  		p.From.Name = obj.NAME_GOTREF
   384  		q := p
   385  		if p.From.Offset != 0 {
   386  			q = obj.Appendp(p, newprog)
   387  			q.As = lea
   388  			q.From.Type = obj.TYPE_MEM
   389  			q.From.Reg = p.To.Reg
   390  			q.From.Offset = p.From.Offset
   391  			q.To = p.To
   392  			p.From.Offset = 0
   393  		}
   394  		if cmplxdest {
   395  			q = obj.Appendp(q, newprog)
   396  			q.As = pAs
   397  			q.To = dest
   398  			q.From.Type = obj.TYPE_REG
   399  			q.From.Reg = reg
   400  		}
   401  	}
   402  	if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN {
   403  		ctxt.Diag("don't know how to handle %v with -dynlink", p)
   404  	}
   405  	var source *obj.Addr
   406  	// MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry
   407  	// MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15)
   408  	// An addition may be inserted between the two MOVs if there is an offset.
   409  	if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   410  		if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
   411  			ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p)
   412  		}
   413  		source = &p.From
   414  	} else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
   415  		source = &p.To
   416  	} else {
   417  		return
   418  	}
   419  	if p.As == obj.ACALL {
   420  		// When dynlinking on 386, almost any call might end up being a call
   421  		// to a PLT, so make sure the GOT pointer is loaded into BX.
   422  		// RegTo2 is set on the replacement call insn to stop it being
   423  		// processed when it is in turn passed to progedit.
   424  		//
   425  		// We disable open-coded defers in buildssa() on 386 ONLY with shared
   426  		// libraries because of this extra code added before deferreturn calls.
   427  		if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 {
   428  			return
   429  		}
   430  		p1 := obj.Appendp(p, newprog)
   431  		p2 := obj.Appendp(p1, newprog)
   432  
   433  		p1.As = ALEAL
   434  		p1.From.Type = obj.TYPE_MEM
   435  		p1.From.Name = obj.NAME_STATIC
   436  		p1.From.Sym = ctxt.Lookup("_GLOBAL_OFFSET_TABLE_")
   437  		p1.To.Type = obj.TYPE_REG
   438  		p1.To.Reg = REG_BX
   439  
   440  		p2.As = p.As
   441  		p2.Scond = p.Scond
   442  		p2.From = p.From
   443  		if p.RestArgs != nil {
   444  			p2.RestArgs = append(p2.RestArgs, p.RestArgs...)
   445  		}
   446  		p2.Reg = p.Reg
   447  		p2.To = p.To
   448  		// p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr
   449  		// in ../pass.go complain, so set it back to TYPE_MEM here, until p2
   450  		// itself gets passed to progedit.
   451  		p2.To.Type = obj.TYPE_MEM
   452  		p2.RegTo2 = 1
   453  
   454  		obj.Nopout(p)
   455  		return
   456  
   457  	}
   458  	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
   459  		return
   460  	}
   461  	if source.Type != obj.TYPE_MEM {
   462  		ctxt.Diag("don't know how to handle %v with -dynlink", p)
   463  	}
   464  	p1 := obj.Appendp(p, newprog)
   465  	p2 := obj.Appendp(p1, newprog)
   466  
   467  	p1.As = mov
   468  	p1.From.Type = obj.TYPE_MEM
   469  	p1.From.Sym = source.Sym
   470  	p1.From.Name = obj.NAME_GOTREF
   471  	p1.To.Type = obj.TYPE_REG
   472  	p1.To.Reg = reg
   473  
   474  	p2.As = p.As
   475  	p2.From = p.From
   476  	p2.To = p.To
   477  	if p.From.Name == obj.NAME_EXTERN {
   478  		p2.From.Reg = reg
   479  		p2.From.Name = obj.NAME_NONE
   480  		p2.From.Sym = nil
   481  	} else if p.To.Name == obj.NAME_EXTERN {
   482  		p2.To.Reg = reg
   483  		p2.To.Name = obj.NAME_NONE
   484  		p2.To.Sym = nil
   485  	} else {
   486  		return
   487  	}
   488  	obj.Nopout(p)
   489  }
   490  
   491  func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
   492  	// RegTo2 is set on the instructions we insert here so they don't get
   493  	// processed twice.
   494  	if p.RegTo2 != 0 {
   495  		return
   496  	}
   497  	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP {
   498  		return
   499  	}
   500  	// Any Prog (aside from the above special cases) with an Addr with Name ==
   501  	// NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX
   502  	// inserted before it.
   503  	isName := func(a *obj.Addr) bool {
   504  		if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 {
   505  			return false
   506  		}
   507  		if a.Sym.Type == objabi.STLSBSS {
   508  			return false
   509  		}
   510  		return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF
   511  	}
   512  
   513  	if isName(&p.From) && p.From.Type == obj.TYPE_ADDR {
   514  		// Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting
   515  		// to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX"
   516  		// respectively.
   517  		if p.To.Type != obj.TYPE_REG {
   518  			q := obj.Appendp(p, newprog)
   519  			q.As = p.As
   520  			q.From.Type = obj.TYPE_REG
   521  			q.From.Reg = REG_CX
   522  			q.To = p.To
   523  			p.As = AMOVL
   524  			p.To.Type = obj.TYPE_REG
   525  			p.To.Reg = REG_CX
   526  			p.To.Sym = nil
   527  			p.To.Name = obj.NAME_NONE
   528  		}
   529  	}
   530  
   531  	if !isName(&p.From) && !isName(&p.To) && (p.GetFrom3() == nil || !isName(p.GetFrom3())) {
   532  		return
   533  	}
   534  	var dst int16 = REG_CX
   535  	if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
   536  		dst = p.To.Reg
   537  		// Why? See the comment near the top of rewriteToUseGot above.
   538  		// AMOVLs might be introduced by the GOT rewrites.
   539  	}
   540  	q := obj.Appendp(p, newprog)
   541  	q.RegTo2 = 1
   542  	r := obj.Appendp(q, newprog)
   543  	r.RegTo2 = 1
   544  	q.As = obj.ACALL
   545  	thunkname := "__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))
   546  	q.To.Sym = ctxt.LookupInit(thunkname, func(s *obj.LSym) { s.Set(obj.AttrLocal, true) })
   547  	q.To.Type = obj.TYPE_MEM
   548  	q.To.Name = obj.NAME_EXTERN
   549  	r.As = p.As
   550  	r.Scond = p.Scond
   551  	r.From = p.From
   552  	r.RestArgs = p.RestArgs
   553  	r.Reg = p.Reg
   554  	r.To = p.To
   555  	if isName(&p.From) {
   556  		r.From.Reg = dst
   557  	}
   558  	if isName(&p.To) {
   559  		r.To.Reg = dst
   560  	}
   561  	if p.GetFrom3() != nil && isName(p.GetFrom3()) {
   562  		r.GetFrom3().Reg = dst
   563  	}
   564  	obj.Nopout(p)
   565  }
   566  
   567  // Prog.mark
   568  const (
   569  	markBit = 1 << 0 // used in errorCheck to avoid duplicate work
   570  )
   571  
   572  func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
   573  	if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
   574  		return
   575  	}
   576  
   577  	p := cursym.Func().Text
   578  	autoffset := int32(p.To.Offset)
   579  	if autoffset < 0 {
   580  		autoffset = 0
   581  	}
   582  
   583  	hasCall := false
   584  	for q := p; q != nil; q = q.Link {
   585  		if q.As == obj.ACALL || q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO {
   586  			hasCall = true
   587  			break
   588  		}
   589  	}
   590  
   591  	var bpsize int
   592  	if ctxt.Arch.Family == sys.AMD64 &&
   593  		!p.From.Sym.NoFrame() && // (1) below
   594  		!(autoffset == 0 && p.From.Sym.NoSplit()) && // (2) below
   595  		!(autoffset == 0 && !hasCall) { // (3) below
   596  		// Make room to save a base pointer.
   597  		// There are 2 cases we must avoid:
   598  		// 1) If noframe is set (which we do for functions which tail call).
   599  		// 2) Scary runtime internals which would be all messed up by frame pointers.
   600  		//    We detect these using a heuristic: frameless nosplit functions.
   601  		//    TODO: Maybe someday we label them all with NOFRAME and get rid of this heuristic.
   602  		// For performance, we also want to avoid:
   603  		// 3) Frameless leaf functions
   604  		bpsize = ctxt.Arch.PtrSize
   605  		autoffset += int32(bpsize)
   606  		p.To.Offset += int64(bpsize)
   607  	} else {
   608  		bpsize = 0
   609  	}
   610  
   611  	textarg := int64(p.To.Val.(int32))
   612  	cursym.Func().Args = int32(textarg)
   613  	cursym.Func().Locals = int32(p.To.Offset)
   614  
   615  	// TODO(rsc): Remove.
   616  	if ctxt.Arch.Family == sys.I386 && cursym.Func().Locals < 0 {
   617  		cursym.Func().Locals = 0
   618  	}
   619  
   620  	// TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'.
   621  	if ctxt.Arch.Family == sys.AMD64 && autoffset < objabi.StackSmall && !p.From.Sym.NoSplit() {
   622  		leaf := true
   623  	LeafSearch:
   624  		for q := p; q != nil; q = q.Link {
   625  			switch q.As {
   626  			case obj.ACALL:
   627  				// Treat common runtime calls that take no arguments
   628  				// the same as duffcopy and duffzero.
   629  				if !isZeroArgRuntimeCall(q.To.Sym) {
   630  					leaf = false
   631  					break LeafSearch
   632  				}
   633  				fallthrough
   634  			case obj.ADUFFCOPY, obj.ADUFFZERO:
   635  				if autoffset >= objabi.StackSmall-8 {
   636  					leaf = false
   637  					break LeafSearch
   638  				}
   639  			}
   640  		}
   641  
   642  		if leaf {
   643  			p.From.Sym.Set(obj.AttrNoSplit, true)
   644  		}
   645  	}
   646  
   647  	var regEntryTmp0, regEntryTmp1 int16
   648  	if ctxt.Arch.Family == sys.AMD64 {
   649  		regEntryTmp0, regEntryTmp1 = REGENTRYTMP0, REGENTRYTMP1
   650  	} else {
   651  		regEntryTmp0, regEntryTmp1 = REG_BX, REG_DI
   652  	}
   653  
   654  	var regg int16
   655  	if !p.From.Sym.NoSplit() {
   656  		// Emit split check and load G register
   657  		p, regg = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg))
   658  	} else if p.From.Sym.Wrapper() {
   659  		// Load G register for the wrapper code
   660  		p, regg = loadG(ctxt, cursym, p, newprog)
   661  	}
   662  
   663  	// Delve debugger would like the next instruction to be noted as the end of the function prologue.
   664  	// TODO: are there other cases (e.g., wrapper functions) that need marking?
   665  	markedPrologue := false
   666  
   667  	if autoffset != 0 {
   668  		if autoffset%int32(ctxt.Arch.RegSize) != 0 {
   669  			ctxt.Diag("unaligned stack size %d", autoffset)
   670  		}
   671  		p = obj.Appendp(p, newprog)
   672  		p.As = AADJSP
   673  		p.From.Type = obj.TYPE_CONST
   674  		p.From.Offset = int64(autoffset)
   675  		p.Spadj = autoffset
   676  		p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
   677  		markedPrologue = true
   678  	}
   679  
   680  	if bpsize > 0 {
   681  		// Save caller's BP
   682  		p = obj.Appendp(p, newprog)
   683  
   684  		p.As = AMOVQ
   685  		p.From.Type = obj.TYPE_REG
   686  		p.From.Reg = REG_BP
   687  		p.To.Type = obj.TYPE_MEM
   688  		p.To.Reg = REG_SP
   689  		p.To.Scale = 1
   690  		p.To.Offset = int64(autoffset) - int64(bpsize)
   691  		if !markedPrologue {
   692  			p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
   693  		}
   694  
   695  		// Move current frame to BP
   696  		p = obj.Appendp(p, newprog)
   697  
   698  		p.As = ALEAQ
   699  		p.From.Type = obj.TYPE_MEM
   700  		p.From.Reg = REG_SP
   701  		p.From.Scale = 1
   702  		p.From.Offset = int64(autoffset) - int64(bpsize)
   703  		p.To.Type = obj.TYPE_REG
   704  		p.To.Reg = REG_BP
   705  	}
   706  
   707  	if cursym.Func().Text.From.Sym.Wrapper() {
   708  		// if g._panic != nil && g._panic.argp == FP {
   709  		//   g._panic.argp = bottom-of-frame
   710  		// }
   711  		//
   712  		//	MOVQ g_panic(g), regEntryTmp0
   713  		//	TESTQ regEntryTmp0, regEntryTmp0
   714  		//	JNE checkargp
   715  		// end:
   716  		//	NOP
   717  		//  ... rest of function ...
   718  		// checkargp:
   719  		//	LEAQ (autoffset+8)(SP), regEntryTmp1
   720  		//	CMPQ panic_argp(regEntryTmp0), regEntryTmp1
   721  		//	JNE end
   722  		//  MOVQ SP, panic_argp(regEntryTmp0)
   723  		//  JMP end
   724  		//
   725  		// The NOP is needed to give the jumps somewhere to land.
   726  		// It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes.
   727  		//
   728  		// The layout is chosen to help static branch prediction:
   729  		// Both conditional jumps are unlikely, so they are arranged to be forward jumps.
   730  
   731  		// MOVQ g_panic(g), regEntryTmp0
   732  		p = obj.Appendp(p, newprog)
   733  		p.As = AMOVQ
   734  		p.From.Type = obj.TYPE_MEM
   735  		p.From.Reg = regg
   736  		p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // g_panic
   737  		p.To.Type = obj.TYPE_REG
   738  		p.To.Reg = regEntryTmp0
   739  		if ctxt.Arch.Family == sys.I386 {
   740  			p.As = AMOVL
   741  		}
   742  
   743  		// TESTQ regEntryTmp0, regEntryTmp0
   744  		p = obj.Appendp(p, newprog)
   745  		p.As = ATESTQ
   746  		p.From.Type = obj.TYPE_REG
   747  		p.From.Reg = regEntryTmp0
   748  		p.To.Type = obj.TYPE_REG
   749  		p.To.Reg = regEntryTmp0
   750  		if ctxt.Arch.Family == sys.I386 {
   751  			p.As = ATESTL
   752  		}
   753  
   754  		// JNE checkargp (checkargp to be resolved later)
   755  		jne := obj.Appendp(p, newprog)
   756  		jne.As = AJNE
   757  		jne.To.Type = obj.TYPE_BRANCH
   758  
   759  		// end:
   760  		//  NOP
   761  		end := obj.Appendp(jne, newprog)
   762  		end.As = obj.ANOP
   763  
   764  		// Fast forward to end of function.
   765  		var last *obj.Prog
   766  		for last = end; last.Link != nil; last = last.Link {
   767  		}
   768  
   769  		// LEAQ (autoffset+8)(SP), regEntryTmp1
   770  		p = obj.Appendp(last, newprog)
   771  		p.As = ALEAQ
   772  		p.From.Type = obj.TYPE_MEM
   773  		p.From.Reg = REG_SP
   774  		p.From.Offset = int64(autoffset) + int64(ctxt.Arch.RegSize)
   775  		p.To.Type = obj.TYPE_REG
   776  		p.To.Reg = regEntryTmp1
   777  		if ctxt.Arch.Family == sys.I386 {
   778  			p.As = ALEAL
   779  		}
   780  
   781  		// Set jne branch target.
   782  		jne.To.SetTarget(p)
   783  
   784  		// CMPQ panic_argp(regEntryTmp0), regEntryTmp1
   785  		p = obj.Appendp(p, newprog)
   786  		p.As = ACMPQ
   787  		p.From.Type = obj.TYPE_MEM
   788  		p.From.Reg = regEntryTmp0
   789  		p.From.Offset = 0 // Panic.argp
   790  		p.To.Type = obj.TYPE_REG
   791  		p.To.Reg = regEntryTmp1
   792  		if ctxt.Arch.Family == sys.I386 {
   793  			p.As = ACMPL
   794  		}
   795  
   796  		// JNE end
   797  		p = obj.Appendp(p, newprog)
   798  		p.As = AJNE
   799  		p.To.Type = obj.TYPE_BRANCH
   800  		p.To.SetTarget(end)
   801  
   802  		// MOVQ SP, panic_argp(regEntryTmp0)
   803  		p = obj.Appendp(p, newprog)
   804  		p.As = AMOVQ
   805  		p.From.Type = obj.TYPE_REG
   806  		p.From.Reg = REG_SP
   807  		p.To.Type = obj.TYPE_MEM
   808  		p.To.Reg = regEntryTmp0
   809  		p.To.Offset = 0 // Panic.argp
   810  		if ctxt.Arch.Family == sys.I386 {
   811  			p.As = AMOVL
   812  		}
   813  
   814  		// JMP end
   815  		p = obj.Appendp(p, newprog)
   816  		p.As = obj.AJMP
   817  		p.To.Type = obj.TYPE_BRANCH
   818  		p.To.SetTarget(end)
   819  
   820  		// Reset p for following code.
   821  		p = end
   822  	}
   823  
   824  	var deltasp int32
   825  	for p = cursym.Func().Text; p != nil; p = p.Link {
   826  		pcsize := ctxt.Arch.RegSize
   827  		switch p.From.Name {
   828  		case obj.NAME_AUTO:
   829  			p.From.Offset += int64(deltasp) - int64(bpsize)
   830  		case obj.NAME_PARAM:
   831  			p.From.Offset += int64(deltasp) + int64(pcsize)
   832  		}
   833  		if p.GetFrom3() != nil {
   834  			switch p.GetFrom3().Name {
   835  			case obj.NAME_AUTO:
   836  				p.GetFrom3().Offset += int64(deltasp) - int64(bpsize)
   837  			case obj.NAME_PARAM:
   838  				p.GetFrom3().Offset += int64(deltasp) + int64(pcsize)
   839  			}
   840  		}
   841  		switch p.To.Name {
   842  		case obj.NAME_AUTO:
   843  			p.To.Offset += int64(deltasp) - int64(bpsize)
   844  		case obj.NAME_PARAM:
   845  			p.To.Offset += int64(deltasp) + int64(pcsize)
   846  		}
   847  
   848  		switch p.As {
   849  		default:
   850  			if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.As != ACMPL && p.As != ACMPQ {
   851  				f := cursym.Func()
   852  				if f.FuncFlag&objabi.FuncFlag_SPWRITE == 0 {
   853  					f.FuncFlag |= objabi.FuncFlag_SPWRITE
   854  					if ctxt.Debugvlog || !ctxt.IsAsm {
   855  						ctxt.Logf("auto-SPWRITE: %s %v\n", cursym.Name, p)
   856  						if !ctxt.IsAsm {
   857  							ctxt.Diag("invalid auto-SPWRITE in non-assembly")
   858  							ctxt.DiagFlush()
   859  							log.Fatalf("bad SPWRITE")
   860  						}
   861  					}
   862  				}
   863  			}
   864  			continue
   865  
   866  		case APUSHL, APUSHFL:
   867  			deltasp += 4
   868  			p.Spadj = 4
   869  			continue
   870  
   871  		case APUSHQ, APUSHFQ:
   872  			deltasp += 8
   873  			p.Spadj = 8
   874  			continue
   875  
   876  		case APUSHW, APUSHFW:
   877  			deltasp += 2
   878  			p.Spadj = 2
   879  			continue
   880  
   881  		case APOPL, APOPFL:
   882  			deltasp -= 4
   883  			p.Spadj = -4
   884  			continue
   885  
   886  		case APOPQ, APOPFQ:
   887  			deltasp -= 8
   888  			p.Spadj = -8
   889  			continue
   890  
   891  		case APOPW, APOPFW:
   892  			deltasp -= 2
   893  			p.Spadj = -2
   894  			continue
   895  
   896  		case AADJSP:
   897  			p.Spadj = int32(p.From.Offset)
   898  			deltasp += int32(p.From.Offset)
   899  			continue
   900  
   901  		case obj.ARET:
   902  			// do nothing
   903  		}
   904  
   905  		if autoffset != deltasp {
   906  			ctxt.Diag("%s: unbalanced PUSH/POP", cursym)
   907  		}
   908  
   909  		if autoffset != 0 {
   910  			to := p.To // Keep To attached to RET for retjmp below
   911  			p.To = obj.Addr{}
   912  			if bpsize > 0 {
   913  				// Restore caller's BP
   914  				p.As = AMOVQ
   915  
   916  				p.From.Type = obj.TYPE_MEM
   917  				p.From.Reg = REG_SP
   918  				p.From.Scale = 1
   919  				p.From.Offset = int64(autoffset) - int64(bpsize)
   920  				p.To.Type = obj.TYPE_REG
   921  				p.To.Reg = REG_BP
   922  				p = obj.Appendp(p, newprog)
   923  			}
   924  
   925  			p.As = AADJSP
   926  			p.From.Type = obj.TYPE_CONST
   927  			p.From.Offset = int64(-autoffset)
   928  			p.Spadj = -autoffset
   929  			p = obj.Appendp(p, newprog)
   930  			p.As = obj.ARET
   931  			p.To = to
   932  
   933  			// If there are instructions following
   934  			// this ARET, they come from a branch
   935  			// with the same stackframe, so undo
   936  			// the cleanup.
   937  			p.Spadj = +autoffset
   938  		}
   939  
   940  		if p.To.Sym != nil { // retjmp
   941  			p.As = obj.AJMP
   942  		}
   943  	}
   944  }
   945  
   946  func isZeroArgRuntimeCall(s *obj.LSym) bool {
   947  	if s == nil {
   948  		return false
   949  	}
   950  	switch s.Name {
   951  	case "runtime.panicdivide", "runtime.panicwrap", "runtime.panicshift":
   952  		return true
   953  	}
   954  	if strings.HasPrefix(s.Name, "runtime.panicIndex") || strings.HasPrefix(s.Name, "runtime.panicSlice") {
   955  		// These functions do take arguments (in registers),
   956  		// but use no stack before they do a stack check. We
   957  		// should include them. See issue 31219.
   958  		return true
   959  	}
   960  	return false
   961  }
   962  
   963  func indir_cx(ctxt *obj.Link, a *obj.Addr) {
   964  	a.Type = obj.TYPE_MEM
   965  	a.Reg = REG_CX
   966  }
   967  
   968  // loadG ensures the G is loaded into a register (either CX or REGG),
   969  // appending instructions to p if necessary. It returns the new last
   970  // instruction and the G register.
   971  func loadG(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc) (*obj.Prog, int16) {
   972  	if ctxt.Arch.Family == sys.AMD64 && cursym.ABI() == obj.ABIInternal {
   973  		// Use the G register directly in ABIInternal
   974  		return p, REGG
   975  	}
   976  
   977  	var regg int16 = REG_CX
   978  	if ctxt.Arch.Family == sys.AMD64 {
   979  		regg = REGG // == REG_R14
   980  	}
   981  
   982  	p = obj.Appendp(p, newprog)
   983  	p.As = AMOVQ
   984  	if ctxt.Arch.PtrSize == 4 {
   985  		p.As = AMOVL
   986  	}
   987  	p.From.Type = obj.TYPE_MEM
   988  	p.From.Reg = REG_TLS
   989  	p.From.Offset = 0
   990  	p.To.Type = obj.TYPE_REG
   991  	p.To.Reg = regg
   992  
   993  	// Rewrite TLS instruction if necessary.
   994  	next := p.Link
   995  	progedit(ctxt, p, newprog)
   996  	for p.Link != next {
   997  		p = p.Link
   998  		progedit(ctxt, p, newprog)
   999  	}
  1000  
  1001  	if p.From.Index == REG_TLS {
  1002  		p.From.Scale = 2
  1003  	}
  1004  
  1005  	return p, regg
  1006  }
  1007  
  1008  // Append code to p to check for stack split.
  1009  // Appends to (does not overwrite) p.
  1010  // Assumes g is in rg.
  1011  // Returns last new instruction and G register.
  1012  func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc, framesize int32, textarg int32) (*obj.Prog, int16) {
  1013  	cmp := ACMPQ
  1014  	lea := ALEAQ
  1015  	mov := AMOVQ
  1016  	sub := ASUBQ
  1017  	push, pop := APUSHQ, APOPQ
  1018  
  1019  	if ctxt.Arch.Family == sys.I386 {
  1020  		cmp = ACMPL
  1021  		lea = ALEAL
  1022  		mov = AMOVL
  1023  		sub = ASUBL
  1024  		push, pop = APUSHL, APOPL
  1025  	}
  1026  
  1027  	tmp := int16(REG_AX) // use AX for 32-bit
  1028  	if ctxt.Arch.Family == sys.AMD64 {
  1029  		// Avoid register parameters.
  1030  		tmp = int16(REGENTRYTMP0)
  1031  	}
  1032  
  1033  	if ctxt.Flag_maymorestack != "" {
  1034  		p = cursym.Func().SpillRegisterArgs(p, newprog)
  1035  
  1036  		if cursym.Func().Text.From.Sym.NeedCtxt() {
  1037  			p = obj.Appendp(p, newprog)
  1038  			p.As = push
  1039  			p.From.Type = obj.TYPE_REG
  1040  			p.From.Reg = REGCTXT
  1041  		}
  1042  
  1043  		// We call maymorestack with an ABI matching the
  1044  		// caller's ABI. Since this is the first thing that
  1045  		// happens in the function, we have to be consistent
  1046  		// with the caller about CPU state (notably,
  1047  		// fixed-meaning registers).
  1048  
  1049  		p = obj.Appendp(p, newprog)
  1050  		p.As = obj.ACALL
  1051  		p.To.Type = obj.TYPE_BRANCH
  1052  		p.To.Name = obj.NAME_EXTERN
  1053  		p.To.Sym = ctxt.LookupABI(ctxt.Flag_maymorestack, cursym.ABI())
  1054  
  1055  		if cursym.Func().Text.From.Sym.NeedCtxt() {
  1056  			p = obj.Appendp(p, newprog)
  1057  			p.As = pop
  1058  			p.To.Type = obj.TYPE_REG
  1059  			p.To.Reg = REGCTXT
  1060  		}
  1061  
  1062  		p = cursym.Func().UnspillRegisterArgs(p, newprog)
  1063  	}
  1064  
  1065  	// Jump back to here after morestack returns.
  1066  	startPred := p
  1067  
  1068  	// Load G register
  1069  	var rg int16
  1070  	p, rg = loadG(ctxt, cursym, p, newprog)
  1071  
  1072  	var q1 *obj.Prog
  1073  	if framesize <= objabi.StackSmall {
  1074  		// small stack: SP <= stackguard
  1075  		//	CMPQ SP, stackguard
  1076  		p = obj.Appendp(p, newprog)
  1077  
  1078  		p.As = cmp
  1079  		p.From.Type = obj.TYPE_REG
  1080  		p.From.Reg = REG_SP
  1081  		p.To.Type = obj.TYPE_MEM
  1082  		p.To.Reg = rg
  1083  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
  1084  		if cursym.CFunc() {
  1085  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
  1086  		}
  1087  
  1088  		// Mark the stack bound check and morestack call async nonpreemptible.
  1089  		// If we get preempted here, when resumed the preemption request is
  1090  		// cleared, but we'll still call morestack, which will double the stack
  1091  		// unnecessarily. See issue #35470.
  1092  		p = ctxt.StartUnsafePoint(p, newprog)
  1093  	} else if framesize <= objabi.StackBig {
  1094  		// large stack: SP-framesize <= stackguard-StackSmall
  1095  		//	LEAQ -xxx(SP), tmp
  1096  		//	CMPQ tmp, stackguard
  1097  		p = obj.Appendp(p, newprog)
  1098  
  1099  		p.As = lea
  1100  		p.From.Type = obj.TYPE_MEM
  1101  		p.From.Reg = REG_SP
  1102  		p.From.Offset = -(int64(framesize) - objabi.StackSmall)
  1103  		p.To.Type = obj.TYPE_REG
  1104  		p.To.Reg = tmp
  1105  
  1106  		p = obj.Appendp(p, newprog)
  1107  		p.As = cmp
  1108  		p.From.Type = obj.TYPE_REG
  1109  		p.From.Reg = tmp
  1110  		p.To.Type = obj.TYPE_MEM
  1111  		p.To.Reg = rg
  1112  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
  1113  		if cursym.CFunc() {
  1114  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
  1115  		}
  1116  
  1117  		p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
  1118  	} else {
  1119  		// Such a large stack we need to protect against underflow.
  1120  		// The runtime guarantees SP > objabi.StackBig, but
  1121  		// framesize is large enough that SP-framesize may
  1122  		// underflow, causing a direct comparison with the
  1123  		// stack guard to incorrectly succeed. We explicitly
  1124  		// guard against underflow.
  1125  		//
  1126  		//	MOVQ	SP, tmp
  1127  		//	SUBQ	$(framesize - StackSmall), tmp
  1128  		//	// If subtraction wrapped (carry set), morestack.
  1129  		//	JCS	label-of-call-to-morestack
  1130  		//	CMPQ	tmp, stackguard
  1131  
  1132  		p = obj.Appendp(p, newprog)
  1133  
  1134  		p.As = mov
  1135  		p.From.Type = obj.TYPE_REG
  1136  		p.From.Reg = REG_SP
  1137  		p.To.Type = obj.TYPE_REG
  1138  		p.To.Reg = tmp
  1139  
  1140  		p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
  1141  
  1142  		p = obj.Appendp(p, newprog)
  1143  		p.As = sub
  1144  		p.From.Type = obj.TYPE_CONST
  1145  		p.From.Offset = int64(framesize) - objabi.StackSmall
  1146  		p.To.Type = obj.TYPE_REG
  1147  		p.To.Reg = tmp
  1148  
  1149  		p = obj.Appendp(p, newprog)
  1150  		p.As = AJCS
  1151  		p.To.Type = obj.TYPE_BRANCH
  1152  		q1 = p
  1153  
  1154  		p = obj.Appendp(p, newprog)
  1155  		p.As = cmp
  1156  		p.From.Type = obj.TYPE_REG
  1157  		p.From.Reg = tmp
  1158  		p.To.Type = obj.TYPE_MEM
  1159  		p.To.Reg = rg
  1160  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
  1161  		if cursym.CFunc() {
  1162  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
  1163  		}
  1164  	}
  1165  
  1166  	// common
  1167  	jls := obj.Appendp(p, newprog)
  1168  	jls.As = AJLS
  1169  	jls.To.Type = obj.TYPE_BRANCH
  1170  
  1171  	end := ctxt.EndUnsafePoint(jls, newprog, -1)
  1172  
  1173  	var last *obj.Prog
  1174  	for last = cursym.Func().Text; last.Link != nil; last = last.Link {
  1175  	}
  1176  
  1177  	// Now we are at the end of the function, but logically
  1178  	// we are still in function prologue. We need to fix the
  1179  	// SP data and PCDATA.
  1180  	spfix := obj.Appendp(last, newprog)
  1181  	spfix.As = obj.ANOP
  1182  	spfix.Spadj = -framesize
  1183  
  1184  	pcdata := ctxt.EmitEntryStackMap(cursym, spfix, newprog)
  1185  	spill := ctxt.StartUnsafePoint(pcdata, newprog)
  1186  	pcdata = cursym.Func().SpillRegisterArgs(spill, newprog)
  1187  
  1188  	call := obj.Appendp(pcdata, newprog)
  1189  	call.Pos = cursym.Func().Text.Pos
  1190  	call.As = obj.ACALL
  1191  	call.To.Type = obj.TYPE_BRANCH
  1192  	call.To.Name = obj.NAME_EXTERN
  1193  	morestack := "runtime.morestack"
  1194  	switch {
  1195  	case cursym.CFunc():
  1196  		morestack = "runtime.morestackc"
  1197  	case !cursym.Func().Text.From.Sym.NeedCtxt():
  1198  		morestack = "runtime.morestack_noctxt"
  1199  	}
  1200  	call.To.Sym = ctxt.Lookup(morestack)
  1201  	// When compiling 386 code for dynamic linking, the call needs to be adjusted
  1202  	// to follow PIC rules. This in turn can insert more instructions, so we need
  1203  	// to keep track of the start of the call (where the jump will be to) and the
  1204  	// end (which following instructions are appended to).
  1205  	callend := call
  1206  	progedit(ctxt, callend, newprog)
  1207  	for ; callend.Link != nil; callend = callend.Link {
  1208  		progedit(ctxt, callend.Link, newprog)
  1209  	}
  1210  
  1211  	pcdata = cursym.Func().UnspillRegisterArgs(callend, newprog)
  1212  	pcdata = ctxt.EndUnsafePoint(pcdata, newprog, -1)
  1213  
  1214  	jmp := obj.Appendp(pcdata, newprog)
  1215  	jmp.As = obj.AJMP
  1216  	jmp.To.Type = obj.TYPE_BRANCH
  1217  	jmp.To.SetTarget(startPred.Link)
  1218  	jmp.Spadj = +framesize
  1219  
  1220  	jls.To.SetTarget(spill)
  1221  	if q1 != nil {
  1222  		q1.To.SetTarget(spill)
  1223  	}
  1224  
  1225  	return end, rg
  1226  }
  1227  
  1228  func isR15(r int16) bool {
  1229  	return r == REG_R15 || r == REG_R15B
  1230  }
  1231  func addrMentionsR15(a *obj.Addr) bool {
  1232  	if a == nil {
  1233  		return false
  1234  	}
  1235  	return isR15(a.Reg) || isR15(a.Index)
  1236  }
  1237  func progMentionsR15(p *obj.Prog) bool {
  1238  	return addrMentionsR15(&p.From) || addrMentionsR15(&p.To) || isR15(p.Reg) || addrMentionsR15(p.GetFrom3())
  1239  }
  1240  
  1241  // progOverwritesR15 reports whether p writes to R15 and does not depend on
  1242  // the previous value of R15.
  1243  func progOverwritesR15(p *obj.Prog) bool {
  1244  	if !(p.To.Type == obj.TYPE_REG && isR15(p.To.Reg)) {
  1245  		// Not writing to R15.
  1246  		return false
  1247  	}
  1248  	if (p.As == AXORL || p.As == AXORQ) && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) {
  1249  		// These look like uses of R15, but aren't, so we must detect these
  1250  		// before the use check below.
  1251  		return true
  1252  	}
  1253  	if addrMentionsR15(&p.From) || isR15(p.Reg) || addrMentionsR15(p.GetFrom3()) {
  1254  		// use before overwrite
  1255  		return false
  1256  	}
  1257  	if p.As == AMOVL || p.As == AMOVQ || p.As == APOPQ {
  1258  		return true
  1259  		// TODO: MOVB might be ok if we only ever use R15B.
  1260  	}
  1261  	return false
  1262  }
  1263  
  1264  func addrUsesGlobal(a *obj.Addr) bool {
  1265  	if a == nil {
  1266  		return false
  1267  	}
  1268  	return a.Name == obj.NAME_EXTERN && !a.Sym.Local()
  1269  }
  1270  func progUsesGlobal(p *obj.Prog) bool {
  1271  	if p.As == obj.ACALL || p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
  1272  		// These opcodes don't use a GOT to access their argument (see rewriteToUseGot),
  1273  		// or R15 would be dead at them anyway.
  1274  		return false
  1275  	}
  1276  	if p.As == ALEAQ {
  1277  		// The GOT entry is placed directly in the destination register; R15 is not used.
  1278  		return false
  1279  	}
  1280  	return addrUsesGlobal(&p.From) || addrUsesGlobal(&p.To) || addrUsesGlobal(p.GetFrom3())
  1281  }
  1282  
  1283  func errorCheck(ctxt *obj.Link, s *obj.LSym) {
  1284  	// When dynamic linking, R15 is used to access globals. Reject code that
  1285  	// uses R15 after a global variable access.
  1286  	if !ctxt.Flag_dynlink {
  1287  		return
  1288  	}
  1289  
  1290  	// Flood fill all the instructions where R15's value is junk.
  1291  	// If there are any uses of R15 in that set, report an error.
  1292  	var work []*obj.Prog
  1293  	var mentionsR15 bool
  1294  	for p := s.Func().Text; p != nil; p = p.Link {
  1295  		if progUsesGlobal(p) {
  1296  			work = append(work, p)
  1297  			p.Mark |= markBit
  1298  		}
  1299  		if progMentionsR15(p) {
  1300  			mentionsR15 = true
  1301  		}
  1302  	}
  1303  	if mentionsR15 {
  1304  		for len(work) > 0 {
  1305  			p := work[len(work)-1]
  1306  			work = work[:len(work)-1]
  1307  			if q := p.To.Target(); q != nil && q.Mark&markBit == 0 {
  1308  				q.Mark |= markBit
  1309  				work = append(work, q)
  1310  			}
  1311  			if p.As == obj.AJMP || p.As == obj.ARET {
  1312  				continue // no fallthrough
  1313  			}
  1314  			if progMentionsR15(p) {
  1315  				if progOverwritesR15(p) {
  1316  					// R15 is overwritten by this instruction. Its value is not junk any more.
  1317  					continue
  1318  				}
  1319  				pos := ctxt.PosTable.Pos(p.Pos)
  1320  				ctxt.Diag("%s:%s: when dynamic linking, R15 is clobbered by a global variable access and is used here: %v", path.Base(pos.Filename()), pos.LineNumber(), p)
  1321  				break // only report one error
  1322  			}
  1323  			if q := p.Link; q != nil && q.Mark&markBit == 0 {
  1324  				q.Mark |= markBit
  1325  				work = append(work, q)
  1326  			}
  1327  		}
  1328  	}
  1329  
  1330  	// Clean up.
  1331  	for p := s.Func().Text; p != nil; p = p.Link {
  1332  		p.Mark &^= markBit
  1333  	}
  1334  }
  1335  
  1336  var unaryDst = map[obj.As]bool{
  1337  	ABSWAPL:     true,
  1338  	ABSWAPQ:     true,
  1339  	ACLDEMOTE:   true,
  1340  	ACLFLUSH:    true,
  1341  	ACLFLUSHOPT: true,
  1342  	ACLWB:       true,
  1343  	ACMPXCHG16B: true,
  1344  	ACMPXCHG8B:  true,
  1345  	ADECB:       true,
  1346  	ADECL:       true,
  1347  	ADECQ:       true,
  1348  	ADECW:       true,
  1349  	AFBSTP:      true,
  1350  	AFFREE:      true,
  1351  	AFLDENV:     true,
  1352  	AFSAVE:      true,
  1353  	AFSTCW:      true,
  1354  	AFSTENV:     true,
  1355  	AFSTSW:      true,
  1356  	AFXSAVE64:   true,
  1357  	AFXSAVE:     true,
  1358  	AINCB:       true,
  1359  	AINCL:       true,
  1360  	AINCQ:       true,
  1361  	AINCW:       true,
  1362  	ANEGB:       true,
  1363  	ANEGL:       true,
  1364  	ANEGQ:       true,
  1365  	ANEGW:       true,
  1366  	ANOTB:       true,
  1367  	ANOTL:       true,
  1368  	ANOTQ:       true,
  1369  	ANOTW:       true,
  1370  	APOPL:       true,
  1371  	APOPQ:       true,
  1372  	APOPW:       true,
  1373  	ARDFSBASEL:  true,
  1374  	ARDFSBASEQ:  true,
  1375  	ARDGSBASEL:  true,
  1376  	ARDGSBASEQ:  true,
  1377  	ARDRANDL:    true,
  1378  	ARDRANDQ:    true,
  1379  	ARDRANDW:    true,
  1380  	ARDSEEDL:    true,
  1381  	ARDSEEDQ:    true,
  1382  	ARDSEEDW:    true,
  1383  	ASETCC:      true,
  1384  	ASETCS:      true,
  1385  	ASETEQ:      true,
  1386  	ASETGE:      true,
  1387  	ASETGT:      true,
  1388  	ASETHI:      true,
  1389  	ASETLE:      true,
  1390  	ASETLS:      true,
  1391  	ASETLT:      true,
  1392  	ASETMI:      true,
  1393  	ASETNE:      true,
  1394  	ASETOC:      true,
  1395  	ASETOS:      true,
  1396  	ASETPC:      true,
  1397  	ASETPL:      true,
  1398  	ASETPS:      true,
  1399  	ASGDT:       true,
  1400  	ASIDT:       true,
  1401  	ASLDTL:      true,
  1402  	ASLDTQ:      true,
  1403  	ASLDTW:      true,
  1404  	ASMSWL:      true,
  1405  	ASMSWQ:      true,
  1406  	ASMSWW:      true,
  1407  	ASTMXCSR:    true,
  1408  	ASTRL:       true,
  1409  	ASTRQ:       true,
  1410  	ASTRW:       true,
  1411  	AXSAVE64:    true,
  1412  	AXSAVE:      true,
  1413  	AXSAVEC64:   true,
  1414  	AXSAVEC:     true,
  1415  	AXSAVEOPT64: true,
  1416  	AXSAVEOPT:   true,
  1417  	AXSAVES64:   true,
  1418  	AXSAVES:     true,
  1419  }
  1420  
  1421  var Linkamd64 = obj.LinkArch{
  1422  	Arch:           sys.ArchAMD64,
  1423  	Init:           instinit,
  1424  	ErrorCheck:     errorCheck,
  1425  	Preprocess:     preprocess,
  1426  	Assemble:       span6,
  1427  	Progedit:       progedit,
  1428  	UnaryDst:       unaryDst,
  1429  	DWARFRegisters: AMD64DWARFRegisters,
  1430  }
  1431  
  1432  var Link386 = obj.LinkArch{
  1433  	Arch:           sys.Arch386,
  1434  	Init:           instinit,
  1435  	Preprocess:     preprocess,
  1436  	Assemble:       span6,
  1437  	Progedit:       progedit,
  1438  	UnaryDst:       unaryDst,
  1439  	DWARFRegisters: X86DWARFRegisters,
  1440  }
  1441  

View as plain text