Source file src/cmd/internal/obj/x86/asm6.go

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/sys"
    37  	"encoding/binary"
    38  	"fmt"
    39  	"internal/buildcfg"
    40  	"log"
    41  	"strings"
    42  )
    43  
    44  var (
    45  	plan9privates *obj.LSym
    46  )
    47  
    48  // Instruction layout.
    49  
    50  // Loop alignment constants:
    51  // want to align loop entry to loopAlign-byte boundary,
    52  // and willing to insert at most maxLoopPad bytes of NOP to do so.
    53  // We define a loop entry as the target of a backward jump.
    54  //
    55  // gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
    56  // and it aligns all jump targets, not just backward jump targets.
    57  //
    58  // As of 6/1/2012, the effect of setting maxLoopPad = 10 here
    59  // is very slight but negative, so the alignment is disabled by
    60  // setting MaxLoopPad = 0. The code is here for reference and
    61  // for future experiments.
    62  //
    63  const (
    64  	loopAlign  = 16
    65  	maxLoopPad = 0
    66  )
    67  
    68  // Bit flags that are used to express jump target properties.
    69  const (
    70  	// branchBackwards marks targets that are located behind.
    71  	// Used to express jumps to loop headers.
    72  	branchBackwards = (1 << iota)
    73  	// branchShort marks branches those target is close,
    74  	// with offset is in -128..127 range.
    75  	branchShort
    76  	// branchLoopHead marks loop entry.
    77  	// Used to insert padding for misaligned loops.
    78  	branchLoopHead
    79  )
    80  
    81  // opBytes holds optab encoding bytes.
    82  // Each ytab reserves fixed amount of bytes in this array.
    83  //
    84  // The size should be the minimal number of bytes that
    85  // are enough to hold biggest optab op lines.
    86  type opBytes [31]uint8
    87  
    88  type Optab struct {
    89  	as     obj.As
    90  	ytab   []ytab
    91  	prefix uint8
    92  	op     opBytes
    93  }
    94  
    95  type movtab struct {
    96  	as   obj.As
    97  	ft   uint8
    98  	f3t  uint8
    99  	tt   uint8
   100  	code uint8
   101  	op   [4]uint8
   102  }
   103  
   104  const (
   105  	Yxxx = iota
   106  	Ynone
   107  	Yi0 // $0
   108  	Yi1 // $1
   109  	Yu2 // $x, x fits in uint2
   110  	Yi8 // $x, x fits in int8
   111  	Yu8 // $x, x fits in uint8
   112  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
   113  	Ys32
   114  	Yi32
   115  	Yi64
   116  	Yiauto
   117  	Yal
   118  	Ycl
   119  	Yax
   120  	Ycx
   121  	Yrb
   122  	Yrl
   123  	Yrl32 // Yrl on 32-bit system
   124  	Yrf
   125  	Yf0
   126  	Yrx
   127  	Ymb
   128  	Yml
   129  	Ym
   130  	Ybr
   131  	Ycs
   132  	Yss
   133  	Yds
   134  	Yes
   135  	Yfs
   136  	Ygs
   137  	Ygdtr
   138  	Yidtr
   139  	Yldtr
   140  	Ymsw
   141  	Ytask
   142  	Ycr0
   143  	Ycr1
   144  	Ycr2
   145  	Ycr3
   146  	Ycr4
   147  	Ycr5
   148  	Ycr6
   149  	Ycr7
   150  	Ycr8
   151  	Ydr0
   152  	Ydr1
   153  	Ydr2
   154  	Ydr3
   155  	Ydr4
   156  	Ydr5
   157  	Ydr6
   158  	Ydr7
   159  	Ytr0
   160  	Ytr1
   161  	Ytr2
   162  	Ytr3
   163  	Ytr4
   164  	Ytr5
   165  	Ytr6
   166  	Ytr7
   167  	Ymr
   168  	Ymm
   169  	Yxr0          // X0 only. "<XMM0>" notation in Intel manual.
   170  	YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex
   171  	Yxr           // X0..X15
   172  	YxrEvex       // X0..X31
   173  	Yxm
   174  	YxmEvex       // YxrEvex+Ym
   175  	Yxvm          // VSIB vector array; vm32x/vm64x
   176  	YxvmEvex      // Yxvm which permits High-16 X register as index.
   177  	YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex
   178  	Yyr           // Y0..Y15
   179  	YyrEvex       // Y0..Y31
   180  	Yym
   181  	YymEvex   // YyrEvex+Ym
   182  	Yyvm      // VSIB vector array; vm32y/vm64y
   183  	YyvmEvex  // Yyvm which permits High-16 Y register as index.
   184  	YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex
   185  	Yzr       // Z0..Z31
   186  	Yzm       // Yzr+Ym
   187  	Yzvm      // VSIB vector array; vm32z/vm64z
   188  	Yk0       // K0
   189  	Yknot0    // K1..K7; write mask
   190  	Yk        // K0..K7; used for KOP
   191  	Ykm       // Yk+Ym; used for KOP
   192  	Ytls
   193  	Ytextsize
   194  	Yindir
   195  	Ymax
   196  )
   197  
   198  const (
   199  	Zxxx = iota
   200  	Zlit
   201  	Zlitm_r
   202  	Zlitr_m
   203  	Zlit_m_r
   204  	Z_rp
   205  	Zbr
   206  	Zcall
   207  	Zcallcon
   208  	Zcallduff
   209  	Zcallind
   210  	Zcallindreg
   211  	Zib_
   212  	Zib_rp
   213  	Zibo_m
   214  	Zibo_m_xm
   215  	Zil_
   216  	Zil_rp
   217  	Ziq_rp
   218  	Zilo_m
   219  	Zjmp
   220  	Zjmpcon
   221  	Zloop
   222  	Zo_iw
   223  	Zm_o
   224  	Zm_r
   225  	Z_m_r
   226  	Zm2_r
   227  	Zm_r_xm
   228  	Zm_r_i_xm
   229  	Zm_r_xm_nr
   230  	Zr_m_xm_nr
   231  	Zibm_r // mmx1,mmx2/mem64,imm8
   232  	Zibr_m
   233  	Zmb_r
   234  	Zaut_r
   235  	Zo_m
   236  	Zo_m64
   237  	Zpseudo
   238  	Zr_m
   239  	Zr_m_xm
   240  	Zrp_
   241  	Z_ib
   242  	Z_il
   243  	Zm_ibo
   244  	Zm_ilo
   245  	Zib_rr
   246  	Zil_rr
   247  	Zbyte
   248  
   249  	Zvex_rm_v_r
   250  	Zvex_rm_v_ro
   251  	Zvex_r_v_rm
   252  	Zvex_i_rm_vo
   253  	Zvex_v_rm_r
   254  	Zvex_i_rm_r
   255  	Zvex_i_r_v
   256  	Zvex_i_rm_v_r
   257  	Zvex
   258  	Zvex_rm_r_vo
   259  	Zvex_i_r_rm
   260  	Zvex_hr_rm_v_r
   261  
   262  	Zevex_first
   263  	Zevex_i_r_k_rm
   264  	Zevex_i_r_rm
   265  	Zevex_i_rm_k_r
   266  	Zevex_i_rm_k_vo
   267  	Zevex_i_rm_r
   268  	Zevex_i_rm_v_k_r
   269  	Zevex_i_rm_v_r
   270  	Zevex_i_rm_vo
   271  	Zevex_k_rmo
   272  	Zevex_r_k_rm
   273  	Zevex_r_v_k_rm
   274  	Zevex_r_v_rm
   275  	Zevex_rm_k_r
   276  	Zevex_rm_v_k_r
   277  	Zevex_rm_v_r
   278  	Zevex_last
   279  
   280  	Zmax
   281  )
   282  
   283  const (
   284  	Px   = 0
   285  	Px1  = 1    // symbolic; exact value doesn't matter
   286  	P32  = 0x32 // 32-bit only
   287  	Pe   = 0x66 // operand escape
   288  	Pm   = 0x0f // 2byte opcode escape
   289  	Pq   = 0xff // both escapes: 66 0f
   290  	Pb   = 0xfe // byte operands
   291  	Pf2  = 0xf2 // xmm escape 1: f2 0f
   292  	Pf3  = 0xf3 // xmm escape 2: f3 0f
   293  	Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f
   294  	Pq3  = 0x67 // xmm escape 3: 66 48 0f
   295  	Pq4  = 0x68 // xmm escape 4: 66 0F 38
   296  	Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38
   297  	Pq5  = 0x6a // xmm escape 5: F3 0F 38
   298  	Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38
   299  	Pfw  = 0xf4 // Pf3 with Rex.w: f3 48 0f
   300  	Pw   = 0x48 // Rex.w
   301  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   302  	Py   = 0x80 // defaults to 64-bit mode
   303  	Py1  = 0x81 // symbolic; exact value doesn't matter
   304  	Py3  = 0x83 // symbolic; exact value doesn't matter
   305  	Pavx = 0x84 // symbolic: exact value doesn't matter
   306  
   307  	RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R
   308  	Rxw     = 1 << 3 // =1, 64-bit operand size
   309  	Rxr     = 1 << 2 // extend modrm reg
   310  	Rxx     = 1 << 1 // extend sib index
   311  	Rxb     = 1 << 0 // extend modrm r/m, sib base, or opcode reg
   312  )
   313  
   314  const (
   315  	// Encoding for VEX prefix in tables.
   316  	// The P, L, and W fields are chosen to match
   317  	// their eventual locations in the VEX prefix bytes.
   318  
   319  	// Encoding for VEX prefix in tables.
   320  	// The P, L, and W fields are chosen to match
   321  	// their eventual locations in the VEX prefix bytes.
   322  
   323  	// Using spare bit to make leading [E]VEX encoding byte different from
   324  	// 0x0f even if all other VEX fields are 0.
   325  	avxEscape = 1 << 6
   326  
   327  	// P field - 2 bits
   328  	vex66 = 1 << 0
   329  	vexF3 = 2 << 0
   330  	vexF2 = 3 << 0
   331  	// L field - 1 bit
   332  	vexLZ  = 0 << 2
   333  	vexLIG = 0 << 2
   334  	vex128 = 0 << 2
   335  	vex256 = 1 << 2
   336  	// W field - 1 bit
   337  	vexWIG = 0 << 7
   338  	vexW0  = 0 << 7
   339  	vexW1  = 1 << 7
   340  	// M field - 5 bits, but mostly reserved; we can store up to 3
   341  	vex0F   = 1 << 3
   342  	vex0F38 = 2 << 3
   343  	vex0F3A = 3 << 3
   344  )
   345  
   346  var ycover [Ymax * Ymax]uint8
   347  
   348  var reg [MAXREG]int
   349  
   350  var regrex [MAXREG + 1]int
   351  
   352  var ynone = []ytab{
   353  	{Zlit, 1, argList{}},
   354  }
   355  
   356  var ytext = []ytab{
   357  	{Zpseudo, 0, argList{Ymb, Ytextsize}},
   358  	{Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
   359  }
   360  
   361  var ynop = []ytab{
   362  	{Zpseudo, 0, argList{}},
   363  	{Zpseudo, 0, argList{Yiauto}},
   364  	{Zpseudo, 0, argList{Yml}},
   365  	{Zpseudo, 0, argList{Yrf}},
   366  	{Zpseudo, 0, argList{Yxr}},
   367  	{Zpseudo, 0, argList{Yiauto}},
   368  	{Zpseudo, 0, argList{Yml}},
   369  	{Zpseudo, 0, argList{Yrf}},
   370  	{Zpseudo, 1, argList{Yxr}},
   371  }
   372  
   373  var yfuncdata = []ytab{
   374  	{Zpseudo, 0, argList{Yi32, Ym}},
   375  }
   376  
   377  var ypcdata = []ytab{
   378  	{Zpseudo, 0, argList{Yi32, Yi32}},
   379  }
   380  
   381  var yxorb = []ytab{
   382  	{Zib_, 1, argList{Yi32, Yal}},
   383  	{Zibo_m, 2, argList{Yi32, Ymb}},
   384  	{Zr_m, 1, argList{Yrb, Ymb}},
   385  	{Zm_r, 1, argList{Ymb, Yrb}},
   386  }
   387  
   388  var yaddl = []ytab{
   389  	{Zibo_m, 2, argList{Yi8, Yml}},
   390  	{Zil_, 1, argList{Yi32, Yax}},
   391  	{Zilo_m, 2, argList{Yi32, Yml}},
   392  	{Zr_m, 1, argList{Yrl, Yml}},
   393  	{Zm_r, 1, argList{Yml, Yrl}},
   394  }
   395  
   396  var yincl = []ytab{
   397  	{Z_rp, 1, argList{Yrl}},
   398  	{Zo_m, 2, argList{Yml}},
   399  }
   400  
   401  var yincq = []ytab{
   402  	{Zo_m, 2, argList{Yml}},
   403  }
   404  
   405  var ycmpb = []ytab{
   406  	{Z_ib, 1, argList{Yal, Yi32}},
   407  	{Zm_ibo, 2, argList{Ymb, Yi32}},
   408  	{Zm_r, 1, argList{Ymb, Yrb}},
   409  	{Zr_m, 1, argList{Yrb, Ymb}},
   410  }
   411  
   412  var ycmpl = []ytab{
   413  	{Zm_ibo, 2, argList{Yml, Yi8}},
   414  	{Z_il, 1, argList{Yax, Yi32}},
   415  	{Zm_ilo, 2, argList{Yml, Yi32}},
   416  	{Zm_r, 1, argList{Yml, Yrl}},
   417  	{Zr_m, 1, argList{Yrl, Yml}},
   418  }
   419  
   420  var yshb = []ytab{
   421  	{Zo_m, 2, argList{Yi1, Ymb}},
   422  	{Zibo_m, 2, argList{Yu8, Ymb}},
   423  	{Zo_m, 2, argList{Ycx, Ymb}},
   424  }
   425  
   426  var yshl = []ytab{
   427  	{Zo_m, 2, argList{Yi1, Yml}},
   428  	{Zibo_m, 2, argList{Yu8, Yml}},
   429  	{Zo_m, 2, argList{Ycl, Yml}},
   430  	{Zo_m, 2, argList{Ycx, Yml}},
   431  }
   432  
   433  var ytestl = []ytab{
   434  	{Zil_, 1, argList{Yi32, Yax}},
   435  	{Zilo_m, 2, argList{Yi32, Yml}},
   436  	{Zr_m, 1, argList{Yrl, Yml}},
   437  	{Zm_r, 1, argList{Yml, Yrl}},
   438  }
   439  
   440  var ymovb = []ytab{
   441  	{Zr_m, 1, argList{Yrb, Ymb}},
   442  	{Zm_r, 1, argList{Ymb, Yrb}},
   443  	{Zib_rp, 1, argList{Yi32, Yrb}},
   444  	{Zibo_m, 2, argList{Yi32, Ymb}},
   445  }
   446  
   447  var ybtl = []ytab{
   448  	{Zibo_m, 2, argList{Yi8, Yml}},
   449  	{Zr_m, 1, argList{Yrl, Yml}},
   450  }
   451  
   452  var ymovw = []ytab{
   453  	{Zr_m, 1, argList{Yrl, Yml}},
   454  	{Zm_r, 1, argList{Yml, Yrl}},
   455  	{Zil_rp, 1, argList{Yi32, Yrl}},
   456  	{Zilo_m, 2, argList{Yi32, Yml}},
   457  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   458  }
   459  
   460  var ymovl = []ytab{
   461  	{Zr_m, 1, argList{Yrl, Yml}},
   462  	{Zm_r, 1, argList{Yml, Yrl}},
   463  	{Zil_rp, 1, argList{Yi32, Yrl}},
   464  	{Zilo_m, 2, argList{Yi32, Yml}},
   465  	{Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
   466  	{Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
   467  	{Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
   468  	{Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
   469  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   470  }
   471  
   472  var yret = []ytab{
   473  	{Zo_iw, 1, argList{}},
   474  	{Zo_iw, 1, argList{Yi32}},
   475  }
   476  
   477  var ymovq = []ytab{
   478  	// valid in 32-bit mode
   479  	{Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
   480  	{Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
   481  	{Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
   482  	{Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   483  	{Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   484  
   485  	// valid only in 64-bit mode, usually with 64-bit prefix
   486  	{Zr_m, 1, argList{Yrl, Yml}},      // 0x89
   487  	{Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
   488  	{Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
   489  	{Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
   490  	{Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
   491  	{Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
   492  	{Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
   493  	{Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
   494  	{Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
   495  	{Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
   496  }
   497  
   498  var ymovbe = []ytab{
   499  	{Zlitm_r, 3, argList{Ym, Yrl}},
   500  	{Zlitr_m, 3, argList{Yrl, Ym}},
   501  }
   502  
   503  var ym_rl = []ytab{
   504  	{Zm_r, 1, argList{Ym, Yrl}},
   505  }
   506  
   507  var yrl_m = []ytab{
   508  	{Zr_m, 1, argList{Yrl, Ym}},
   509  }
   510  
   511  var ymb_rl = []ytab{
   512  	{Zmb_r, 1, argList{Ymb, Yrl}},
   513  }
   514  
   515  var yml_rl = []ytab{
   516  	{Zm_r, 1, argList{Yml, Yrl}},
   517  }
   518  
   519  var yrl_ml = []ytab{
   520  	{Zr_m, 1, argList{Yrl, Yml}},
   521  }
   522  
   523  var yml_mb = []ytab{
   524  	{Zr_m, 1, argList{Yrb, Ymb}},
   525  	{Zm_r, 1, argList{Ymb, Yrb}},
   526  }
   527  
   528  var yrb_mb = []ytab{
   529  	{Zr_m, 1, argList{Yrb, Ymb}},
   530  }
   531  
   532  var yxchg = []ytab{
   533  	{Z_rp, 1, argList{Yax, Yrl}},
   534  	{Zrp_, 1, argList{Yrl, Yax}},
   535  	{Zr_m, 1, argList{Yrl, Yml}},
   536  	{Zm_r, 1, argList{Yml, Yrl}},
   537  }
   538  
   539  var ydivl = []ytab{
   540  	{Zm_o, 2, argList{Yml}},
   541  }
   542  
   543  var ydivb = []ytab{
   544  	{Zm_o, 2, argList{Ymb}},
   545  }
   546  
   547  var yimul = []ytab{
   548  	{Zm_o, 2, argList{Yml}},
   549  	{Zib_rr, 1, argList{Yi8, Yrl}},
   550  	{Zil_rr, 1, argList{Yi32, Yrl}},
   551  	{Zm_r, 2, argList{Yml, Yrl}},
   552  }
   553  
   554  var yimul3 = []ytab{
   555  	{Zibm_r, 2, argList{Yi8, Yml, Yrl}},
   556  	{Zibm_r, 2, argList{Yi32, Yml, Yrl}},
   557  }
   558  
   559  var ybyte = []ytab{
   560  	{Zbyte, 1, argList{Yi64}},
   561  }
   562  
   563  var yin = []ytab{
   564  	{Zib_, 1, argList{Yi32}},
   565  	{Zlit, 1, argList{}},
   566  }
   567  
   568  var yint = []ytab{
   569  	{Zib_, 1, argList{Yi32}},
   570  }
   571  
   572  var ypushl = []ytab{
   573  	{Zrp_, 1, argList{Yrl}},
   574  	{Zm_o, 2, argList{Ym}},
   575  	{Zib_, 1, argList{Yi8}},
   576  	{Zil_, 1, argList{Yi32}},
   577  }
   578  
   579  var ypopl = []ytab{
   580  	{Z_rp, 1, argList{Yrl}},
   581  	{Zo_m, 2, argList{Ym}},
   582  }
   583  
   584  var ywrfsbase = []ytab{
   585  	{Zm_o, 2, argList{Yrl}},
   586  }
   587  
   588  var yrdrand = []ytab{
   589  	{Zo_m, 2, argList{Yrl}},
   590  }
   591  
   592  var yclflush = []ytab{
   593  	{Zo_m, 2, argList{Ym}},
   594  }
   595  
   596  var ybswap = []ytab{
   597  	{Z_rp, 2, argList{Yrl}},
   598  }
   599  
   600  var yscond = []ytab{
   601  	{Zo_m, 2, argList{Ymb}},
   602  }
   603  
   604  var yjcond = []ytab{
   605  	{Zbr, 0, argList{Ybr}},
   606  	{Zbr, 0, argList{Yi0, Ybr}},
   607  	{Zbr, 1, argList{Yi1, Ybr}},
   608  }
   609  
   610  var yloop = []ytab{
   611  	{Zloop, 1, argList{Ybr}},
   612  }
   613  
   614  var ycall = []ytab{
   615  	{Zcallindreg, 0, argList{Yml}},
   616  	{Zcallindreg, 2, argList{Yrx, Yrx}},
   617  	{Zcallind, 2, argList{Yindir}},
   618  	{Zcall, 0, argList{Ybr}},
   619  	{Zcallcon, 1, argList{Yi32}},
   620  }
   621  
   622  var yduff = []ytab{
   623  	{Zcallduff, 1, argList{Yi32}},
   624  }
   625  
   626  var yjmp = []ytab{
   627  	{Zo_m64, 2, argList{Yml}},
   628  	{Zjmp, 0, argList{Ybr}},
   629  	{Zjmpcon, 1, argList{Yi32}},
   630  }
   631  
   632  var yfmvd = []ytab{
   633  	{Zm_o, 2, argList{Ym, Yf0}},
   634  	{Zo_m, 2, argList{Yf0, Ym}},
   635  	{Zm_o, 2, argList{Yrf, Yf0}},
   636  	{Zo_m, 2, argList{Yf0, Yrf}},
   637  }
   638  
   639  var yfmvdp = []ytab{
   640  	{Zo_m, 2, argList{Yf0, Ym}},
   641  	{Zo_m, 2, argList{Yf0, Yrf}},
   642  }
   643  
   644  var yfmvf = []ytab{
   645  	{Zm_o, 2, argList{Ym, Yf0}},
   646  	{Zo_m, 2, argList{Yf0, Ym}},
   647  }
   648  
   649  var yfmvx = []ytab{
   650  	{Zm_o, 2, argList{Ym, Yf0}},
   651  }
   652  
   653  var yfmvp = []ytab{
   654  	{Zo_m, 2, argList{Yf0, Ym}},
   655  }
   656  
   657  var yfcmv = []ytab{
   658  	{Zm_o, 2, argList{Yrf, Yf0}},
   659  }
   660  
   661  var yfadd = []ytab{
   662  	{Zm_o, 2, argList{Ym, Yf0}},
   663  	{Zm_o, 2, argList{Yrf, Yf0}},
   664  	{Zo_m, 2, argList{Yf0, Yrf}},
   665  }
   666  
   667  var yfxch = []ytab{
   668  	{Zo_m, 2, argList{Yf0, Yrf}},
   669  	{Zm_o, 2, argList{Yrf, Yf0}},
   670  }
   671  
   672  var ycompp = []ytab{
   673  	{Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1
   674  }
   675  
   676  var ystsw = []ytab{
   677  	{Zo_m, 2, argList{Ym}},
   678  	{Zlit, 1, argList{Yax}},
   679  }
   680  
   681  var ysvrs_mo = []ytab{
   682  	{Zm_o, 2, argList{Ym}},
   683  }
   684  
   685  // unaryDst version of "ysvrs_mo".
   686  var ysvrs_om = []ytab{
   687  	{Zo_m, 2, argList{Ym}},
   688  }
   689  
   690  var ymm = []ytab{
   691  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   692  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   693  }
   694  
   695  var yxm = []ytab{
   696  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   697  }
   698  
   699  var yxm_q4 = []ytab{
   700  	{Zm_r, 1, argList{Yxm, Yxr}},
   701  }
   702  
   703  var yxcvm1 = []ytab{
   704  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   705  	{Zm_r_xm, 2, argList{Yxm, Ymr}},
   706  }
   707  
   708  var yxcvm2 = []ytab{
   709  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   710  	{Zm_r_xm, 2, argList{Ymm, Yxr}},
   711  }
   712  
   713  var yxr = []ytab{
   714  	{Zm_r_xm, 1, argList{Yxr, Yxr}},
   715  }
   716  
   717  var yxr_ml = []ytab{
   718  	{Zr_m_xm, 1, argList{Yxr, Yml}},
   719  }
   720  
   721  var ymr = []ytab{
   722  	{Zm_r, 1, argList{Ymr, Ymr}},
   723  }
   724  
   725  var ymr_ml = []ytab{
   726  	{Zr_m_xm, 1, argList{Ymr, Yml}},
   727  }
   728  
   729  var yxcmpi = []ytab{
   730  	{Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
   731  }
   732  
   733  var yxmov = []ytab{
   734  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   735  	{Zr_m_xm, 1, argList{Yxr, Yxm}},
   736  }
   737  
   738  var yxcvfl = []ytab{
   739  	{Zm_r_xm, 1, argList{Yxm, Yrl}},
   740  }
   741  
   742  var yxcvlf = []ytab{
   743  	{Zm_r_xm, 1, argList{Yml, Yxr}},
   744  }
   745  
   746  var yxcvfq = []ytab{
   747  	{Zm_r_xm, 2, argList{Yxm, Yrl}},
   748  }
   749  
   750  var yxcvqf = []ytab{
   751  	{Zm_r_xm, 2, argList{Yml, Yxr}},
   752  }
   753  
   754  var yps = []ytab{
   755  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   756  	{Zibo_m_xm, 2, argList{Yi8, Ymr}},
   757  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   758  	{Zibo_m_xm, 3, argList{Yi8, Yxr}},
   759  }
   760  
   761  var yxrrl = []ytab{
   762  	{Zm_r, 1, argList{Yxr, Yrl}},
   763  }
   764  
   765  var ymrxr = []ytab{
   766  	{Zm_r, 1, argList{Ymr, Yxr}},
   767  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   768  }
   769  
   770  var ymshuf = []ytab{
   771  	{Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
   772  }
   773  
   774  var ymshufb = []ytab{
   775  	{Zm2_r, 2, argList{Yxm, Yxr}},
   776  }
   777  
   778  // It should never have more than 1 entry,
   779  // because some optab entries you opcode secuences that
   780  // are longer than 2 bytes (zoffset=2 here),
   781  // ROUNDPD and ROUNDPS and recently added BLENDPD,
   782  // to name a few.
   783  var yxshuf = []ytab{
   784  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   785  }
   786  
   787  var yextrw = []ytab{
   788  	{Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
   789  	{Zibr_m, 2, argList{Yu8, Yxr, Yml}},
   790  }
   791  
   792  var yextr = []ytab{
   793  	{Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
   794  }
   795  
   796  var yinsrw = []ytab{
   797  	{Zibm_r, 2, argList{Yu8, Yml, Yxr}},
   798  }
   799  
   800  var yinsr = []ytab{
   801  	{Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
   802  }
   803  
   804  var ypsdq = []ytab{
   805  	{Zibo_m, 2, argList{Yi8, Yxr}},
   806  }
   807  
   808  var ymskb = []ytab{
   809  	{Zm_r_xm, 2, argList{Yxr, Yrl}},
   810  	{Zm_r_xm, 1, argList{Ymr, Yrl}},
   811  }
   812  
   813  var ycrc32l = []ytab{
   814  	{Zlitm_r, 0, argList{Yml, Yrl}},
   815  }
   816  
   817  var ycrc32b = []ytab{
   818  	{Zlitm_r, 0, argList{Ymb, Yrl}},
   819  }
   820  
   821  var yprefetch = []ytab{
   822  	{Zm_o, 2, argList{Ym}},
   823  }
   824  
   825  var yaes = []ytab{
   826  	{Zlitm_r, 2, argList{Yxm, Yxr}},
   827  }
   828  
   829  var yxbegin = []ytab{
   830  	{Zjmp, 1, argList{Ybr}},
   831  }
   832  
   833  var yxabort = []ytab{
   834  	{Zib_, 1, argList{Yu8}},
   835  }
   836  
   837  var ylddqu = []ytab{
   838  	{Zm_r, 1, argList{Ym, Yxr}},
   839  }
   840  
   841  var ypalignr = []ytab{
   842  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   843  }
   844  
   845  var ysha256rnds2 = []ytab{
   846  	{Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
   847  }
   848  
   849  var yblendvpd = []ytab{
   850  	{Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
   851  }
   852  
   853  var ymmxmm0f38 = []ytab{
   854  	{Zlitm_r, 3, argList{Ymm, Ymr}},
   855  	{Zlitm_r, 5, argList{Yxm, Yxr}},
   856  }
   857  
   858  var yextractps = []ytab{
   859  	{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
   860  }
   861  
   862  var ysha1rnds4 = []ytab{
   863  	{Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
   864  }
   865  
   866  // You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   867  // ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   868  // to find the entry with the given p.As and then looks through the ytable for
   869  // that instruction (the second field in the optab struct) for a line whose
   870  // first two values match the Ytypes of the p.From and p.To operands.  The
   871  // function oclass computes the specific Ytype of an operand and then the set
   872  // of more general Ytypes that it satisfies is implied by the ycover table, set
   873  // up in instinit.  For example, oclass distinguishes the constants 0 and 1
   874  // from the more general 8-bit constants, but instinit says
   875  //
   876  //        ycover[Yi0*Ymax+Ys32] = 1
   877  //        ycover[Yi1*Ymax+Ys32] = 1
   878  //        ycover[Yi8*Ymax+Ys32] = 1
   879  //
   880  // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   881  // if that's what an instruction can handle.
   882  //
   883  // In parallel with the scan through the ytable for the appropriate line, there
   884  // is a z pointer that starts out pointing at the strange magic byte list in
   885  // the Optab struct.  With each step past a non-matching ytable line, z
   886  // advances by the 4th entry in the line.  When a matching line is found, that
   887  // z pointer has the extra data to use in laying down the instruction bytes.
   888  // The actual bytes laid down are a function of the 3rd entry in the line (that
   889  // is, the Ztype) and the z bytes.
   890  //
   891  // For example, let's look at AADDL.  The optab line says:
   892  //        {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   893  //
   894  // and yaddl says
   895  //        var yaddl = []ytab{
   896  //                {Yi8, Ynone, Yml, Zibo_m, 2},
   897  //                {Yi32, Ynone, Yax, Zil_, 1},
   898  //                {Yi32, Ynone, Yml, Zilo_m, 2},
   899  //                {Yrl, Ynone, Yml, Zr_m, 1},
   900  //                {Yml, Ynone, Yrl, Zm_r, 1},
   901  //        }
   902  //
   903  // so there are 5 possible types of ADDL instruction that can be laid down, and
   904  // possible states used to lay them down (Ztype and z pointer, assuming z
   905  // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   906  //
   907  //        Yi8, Yml -> Zibo_m, z (0x83, 00)
   908  //        Yi32, Yax -> Zil_, z+2 (0x05)
   909  //        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   910  //        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   911  //        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   912  //
   913  // The Pconstant in the optab line controls the prefix bytes to emit.  That's
   914  // relatively straightforward as this program goes.
   915  //
   916  // The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   917  // example, is an opcode byte (z[0]) then an asmando (which is some kind of
   918  // encoded addressing mode for the Yml arg), and then a single immediate byte.
   919  // Zilo_m is the same but a long (32-bit) immediate.
   920  var optab =
   921  //	as, ytab, andproto, opcode
   922  [...]Optab{
   923  	{obj.AXXX, nil, 0, opBytes{}},
   924  	{AAAA, ynone, P32, opBytes{0x37}},
   925  	{AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
   926  	{AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
   927  	{AAAS, ynone, P32, opBytes{0x3f}},
   928  	{AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
   929  	{AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   930  	{AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   931  	{AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   932  	{AADCXL, yml_rl, Pq4, opBytes{0xf6}},
   933  	{AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
   934  	{AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
   935  	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   936  	{AADDPD, yxm, Pq, opBytes{0x58}},
   937  	{AADDPS, yxm, Pm, opBytes{0x58}},
   938  	{AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   939  	{AADDSD, yxm, Pf2, opBytes{0x58}},
   940  	{AADDSS, yxm, Pf3, opBytes{0x58}},
   941  	{AADDSUBPD, yxm, Pq, opBytes{0xd0}},
   942  	{AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
   943  	{AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   944  	{AADOXL, yml_rl, Pq5, opBytes{0xf6}},
   945  	{AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
   946  	{AADJSP, nil, 0, opBytes{}},
   947  	{AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
   948  	{AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   949  	{AANDNPD, yxm, Pq, opBytes{0x55}},
   950  	{AANDNPS, yxm, Pm, opBytes{0x55}},
   951  	{AANDPD, yxm, Pq, opBytes{0x54}},
   952  	{AANDPS, yxm, Pm, opBytes{0x54}},
   953  	{AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   954  	{AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   955  	{AARPL, yrl_ml, P32, opBytes{0x63}},
   956  	{ABOUNDL, yrl_m, P32, opBytes{0x62}},
   957  	{ABOUNDW, yrl_m, Pe, opBytes{0x62}},
   958  	{ABSFL, yml_rl, Pm, opBytes{0xbc}},
   959  	{ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
   960  	{ABSFW, yml_rl, Pq, opBytes{0xbc}},
   961  	{ABSRL, yml_rl, Pm, opBytes{0xbd}},
   962  	{ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
   963  	{ABSRW, yml_rl, Pq, opBytes{0xbd}},
   964  	{ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
   965  	{ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
   966  	{ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
   967  	{ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
   968  	{ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
   969  	{ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
   970  	{ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
   971  	{ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
   972  	{ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
   973  	{ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
   974  	{ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
   975  	{ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
   976  	{ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
   977  	{ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
   978  	{ABYTE, ybyte, Px, opBytes{1}},
   979  	{obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
   980  	{ACBW, ynone, Pe, opBytes{0x98}},
   981  	{ACDQ, ynone, Px, opBytes{0x99}},
   982  	{ACDQE, ynone, Pw, opBytes{0x98}},
   983  	{ACLAC, ynone, Pm, opBytes{01, 0xca}},
   984  	{ACLC, ynone, Px, opBytes{0xf8}},
   985  	{ACLD, ynone, Px, opBytes{0xfc}},
   986  	{ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}},
   987  	{ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
   988  	{ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
   989  	{ACLI, ynone, Px, opBytes{0xfa}},
   990  	{ACLTS, ynone, Pm, opBytes{0x06}},
   991  	{ACLWB, yclflush, Pq, opBytes{0xae, 06}},
   992  	{ACMC, ynone, Px, opBytes{0xf5}},
   993  	{ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
   994  	{ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
   995  	{ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
   996  	{ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
   997  	{ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
   998  	{ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
   999  	{ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
  1000  	{ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
  1001  	{ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
  1002  	{ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
  1003  	{ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
  1004  	{ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
  1005  	{ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
  1006  	{ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
  1007  	{ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
  1008  	{ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
  1009  	{ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
  1010  	{ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
  1011  	{ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
  1012  	{ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
  1013  	{ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
  1014  	{ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
  1015  	{ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
  1016  	{ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
  1017  	{ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
  1018  	{ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
  1019  	{ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
  1020  	{ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
  1021  	{ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
  1022  	{ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
  1023  	{ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
  1024  	{ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
  1025  	{ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
  1026  	{ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
  1027  	{ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
  1028  	{ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
  1029  	{ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
  1030  	{ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
  1031  	{ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
  1032  	{ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
  1033  	{ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
  1034  	{ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
  1035  	{ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
  1036  	{ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
  1037  	{ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
  1038  	{ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
  1039  	{ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
  1040  	{ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
  1041  	{ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
  1042  	{ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1043  	{ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
  1044  	{ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
  1045  	{ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1046  	{ACMPSB, ynone, Pb, opBytes{0xa6}},
  1047  	{ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
  1048  	{ACMPSL, ynone, Px, opBytes{0xa7}},
  1049  	{ACMPSQ, ynone, Pw, opBytes{0xa7}},
  1050  	{ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
  1051  	{ACMPSW, ynone, Pe, opBytes{0xa7}},
  1052  	{ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1053  	{ACOMISD, yxm, Pe, opBytes{0x2f}},
  1054  	{ACOMISS, yxm, Pm, opBytes{0x2f}},
  1055  	{ACPUID, ynone, Pm, opBytes{0xa2}},
  1056  	{ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
  1057  	{ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
  1058  	{ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
  1059  	{ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
  1060  	{ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
  1061  	{ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
  1062  	{ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
  1063  	{ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
  1064  	{ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
  1065  	{ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
  1066  	{ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
  1067  	{ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
  1068  	{ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
  1069  	{ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
  1070  	{ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
  1071  	{ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
  1072  	{ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
  1073  	{ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
  1074  	{ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
  1075  	{ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
  1076  	{ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
  1077  	{ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
  1078  	{ACWD, ynone, Pe, opBytes{0x99}},
  1079  	{ACWDE, ynone, Px, opBytes{0x98}},
  1080  	{ACQO, ynone, Pw, opBytes{0x99}},
  1081  	{ADAA, ynone, P32, opBytes{0x27}},
  1082  	{ADAS, ynone, P32, opBytes{0x2f}},
  1083  	{ADECB, yscond, Pb, opBytes{0xfe, 01}},
  1084  	{ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
  1085  	{ADECQ, yincq, Pw, opBytes{0xff, 01}},
  1086  	{ADECW, yincq, Pe, opBytes{0xff, 01}},
  1087  	{ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
  1088  	{ADIVL, ydivl, Px, opBytes{0xf7, 06}},
  1089  	{ADIVPD, yxm, Pe, opBytes{0x5e}},
  1090  	{ADIVPS, yxm, Pm, opBytes{0x5e}},
  1091  	{ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
  1092  	{ADIVSD, yxm, Pf2, opBytes{0x5e}},
  1093  	{ADIVSS, yxm, Pf3, opBytes{0x5e}},
  1094  	{ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
  1095  	{ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
  1096  	{ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
  1097  	{AEMMS, ynone, Pm, opBytes{0x77}},
  1098  	{AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
  1099  	{AENTER, nil, 0, opBytes{}}, // botch
  1100  	{AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
  1101  	{AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
  1102  	{AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1103  	{AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1104  	{AHLT, ynone, Px, opBytes{0xf4}},
  1105  	{AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
  1106  	{AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
  1107  	{AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
  1108  	{AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
  1109  	{AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
  1110  	{AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1111  	{AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1112  	{AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1113  	{AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
  1114  	{AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
  1115  	{AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
  1116  	{AINB, yin, Pb, opBytes{0xe4, 0xec}},
  1117  	{AINW, yin, Pe, opBytes{0xe5, 0xed}},
  1118  	{AINL, yin, Px, opBytes{0xe5, 0xed}},
  1119  	{AINCB, yscond, Pb, opBytes{0xfe, 00}},
  1120  	{AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
  1121  	{AINCQ, yincq, Pw, opBytes{0xff, 00}},
  1122  	{AINCW, yincq, Pe, opBytes{0xff, 00}},
  1123  	{AINSB, ynone, Pb, opBytes{0x6c}},
  1124  	{AINSL, ynone, Px, opBytes{0x6d}},
  1125  	{AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
  1126  	{AINSW, ynone, Pe, opBytes{0x6d}},
  1127  	{AICEBP, ynone, Px, opBytes{0xf1}},
  1128  	{AINT, yint, Px, opBytes{0xcd}},
  1129  	{AINTO, ynone, P32, opBytes{0xce}},
  1130  	{AIRETL, ynone, Px, opBytes{0xcf}},
  1131  	{AIRETQ, ynone, Pw, opBytes{0xcf}},
  1132  	{AIRETW, ynone, Pe, opBytes{0xcf}},
  1133  	{AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
  1134  	{AJCS, yjcond, Px, opBytes{0x72, 0x82}},
  1135  	{AJCXZL, yloop, Px, opBytes{0xe3}},
  1136  	{AJCXZW, yloop, Px, opBytes{0xe3}},
  1137  	{AJCXZQ, yloop, Px, opBytes{0xe3}},
  1138  	{AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
  1139  	{AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
  1140  	{AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
  1141  	{AJHI, yjcond, Px, opBytes{0x77, 0x87}},
  1142  	{AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
  1143  	{AJLS, yjcond, Px, opBytes{0x76, 0x86}},
  1144  	{AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
  1145  	{AJMI, yjcond, Px, opBytes{0x78, 0x88}},
  1146  	{obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
  1147  	{AJNE, yjcond, Px, opBytes{0x75, 0x85}},
  1148  	{AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
  1149  	{AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
  1150  	{AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
  1151  	{AJPL, yjcond, Px, opBytes{0x79, 0x89}},
  1152  	{AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
  1153  	{AHADDPD, yxm, Pq, opBytes{0x7c}},
  1154  	{AHADDPS, yxm, Pf2, opBytes{0x7c}},
  1155  	{AHSUBPD, yxm, Pq, opBytes{0x7d}},
  1156  	{AHSUBPS, yxm, Pf2, opBytes{0x7d}},
  1157  	{ALAHF, ynone, Px, opBytes{0x9f}},
  1158  	{ALARL, yml_rl, Pm, opBytes{0x02}},
  1159  	{ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
  1160  	{ALARW, yml_rl, Pq, opBytes{0x02}},
  1161  	{ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
  1162  	{ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
  1163  	{ALEAL, ym_rl, Px, opBytes{0x8d}},
  1164  	{ALEAQ, ym_rl, Pw, opBytes{0x8d}},
  1165  	{ALEAVEL, ynone, P32, opBytes{0xc9}},
  1166  	{ALEAVEQ, ynone, Py, opBytes{0xc9}},
  1167  	{ALEAVEW, ynone, Pe, opBytes{0xc9}},
  1168  	{ALEAW, ym_rl, Pe, opBytes{0x8d}},
  1169  	{ALOCK, ynone, Px, opBytes{0xf0}},
  1170  	{ALODSB, ynone, Pb, opBytes{0xac}},
  1171  	{ALODSL, ynone, Px, opBytes{0xad}},
  1172  	{ALODSQ, ynone, Pw, opBytes{0xad}},
  1173  	{ALODSW, ynone, Pe, opBytes{0xad}},
  1174  	{ALONG, ybyte, Px, opBytes{4}},
  1175  	{ALOOP, yloop, Px, opBytes{0xe2}},
  1176  	{ALOOPEQ, yloop, Px, opBytes{0xe1}},
  1177  	{ALOOPNE, yloop, Px, opBytes{0xe0}},
  1178  	{ALTR, ydivl, Pm, opBytes{0x00, 03}},
  1179  	{ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
  1180  	{ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
  1181  	{ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
  1182  	{ALSLL, yml_rl, Pm, opBytes{0x03}},
  1183  	{ALSLW, yml_rl, Pq, opBytes{0x03}},
  1184  	{ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
  1185  	{AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
  1186  	{AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
  1187  	{AMAXPD, yxm, Pe, opBytes{0x5f}},
  1188  	{AMAXPS, yxm, Pm, opBytes{0x5f}},
  1189  	{AMAXSD, yxm, Pf2, opBytes{0x5f}},
  1190  	{AMAXSS, yxm, Pf3, opBytes{0x5f}},
  1191  	{AMINPD, yxm, Pe, opBytes{0x5d}},
  1192  	{AMINPS, yxm, Pm, opBytes{0x5d}},
  1193  	{AMINSD, yxm, Pf2, opBytes{0x5d}},
  1194  	{AMINSS, yxm, Pf3, opBytes{0x5d}},
  1195  	{AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
  1196  	{AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
  1197  	{AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
  1198  	{AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
  1199  	{AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1200  	{AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
  1201  	{AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
  1202  	{AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
  1203  	{AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
  1204  	{AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
  1205  	{AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
  1206  	{AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
  1207  	{AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
  1208  	{AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
  1209  	{AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
  1210  	{AMOVHLPS, yxr, Pm, opBytes{0x12}},
  1211  	{AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
  1212  	{AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
  1213  	{AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1214  	{AMOVLHPS, yxr, Pm, opBytes{0x16}},
  1215  	{AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
  1216  	{AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
  1217  	{AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
  1218  	{AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
  1219  	{AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
  1220  	{AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
  1221  	{AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
  1222  	{AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
  1223  	{AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
  1224  	{AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
  1225  	{AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
  1226  	{AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1227  	{AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
  1228  	{AMOVSB, ynone, Pb, opBytes{0xa4}},
  1229  	{AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
  1230  	{AMOVSL, ynone, Px, opBytes{0xa5}},
  1231  	{AMOVSQ, ynone, Pw, opBytes{0xa5}},
  1232  	{AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
  1233  	{AMOVSW, ynone, Pe, opBytes{0xa5}},
  1234  	{AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
  1235  	{AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
  1236  	{AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
  1237  	{AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
  1238  	{AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
  1239  	{AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
  1240  	{AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
  1241  	{AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
  1242  	{AMULB, ydivb, Pb, opBytes{0xf6, 04}},
  1243  	{AMULL, ydivl, Px, opBytes{0xf7, 04}},
  1244  	{AMULPD, yxm, Pe, opBytes{0x59}},
  1245  	{AMULPS, yxm, Ym, opBytes{0x59}},
  1246  	{AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
  1247  	{AMULSD, yxm, Pf2, opBytes{0x59}},
  1248  	{AMULSS, yxm, Pf3, opBytes{0x59}},
  1249  	{AMULW, ydivl, Pe, opBytes{0xf7, 04}},
  1250  	{ANEGB, yscond, Pb, opBytes{0xf6, 03}},
  1251  	{ANEGL, yscond, Px, opBytes{0xf7, 03}},
  1252  	{ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
  1253  	{ANEGW, yscond, Pe, opBytes{0xf7, 03}},
  1254  	{obj.ANOP, ynop, Px, opBytes{0, 0}},
  1255  	{ANOTB, yscond, Pb, opBytes{0xf6, 02}},
  1256  	{ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1257  	{ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
  1258  	{ANOTW, yscond, Pe, opBytes{0xf7, 02}},
  1259  	{AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
  1260  	{AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1261  	{AORPD, yxm, Pq, opBytes{0x56}},
  1262  	{AORPS, yxm, Pm, opBytes{0x56}},
  1263  	{AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1264  	{AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1265  	{AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
  1266  	{AOUTL, yin, Px, opBytes{0xe7, 0xef}},
  1267  	{AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
  1268  	{AOUTSB, ynone, Pb, opBytes{0x6e}},
  1269  	{AOUTSL, ynone, Px, opBytes{0x6f}},
  1270  	{AOUTSW, ynone, Pe, opBytes{0x6f}},
  1271  	{APABSB, yxm_q4, Pq4, opBytes{0x1c}},
  1272  	{APABSD, yxm_q4, Pq4, opBytes{0x1e}},
  1273  	{APABSW, yxm_q4, Pq4, opBytes{0x1d}},
  1274  	{APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
  1275  	{APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
  1276  	{APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
  1277  	{APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
  1278  	{APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
  1279  	{APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
  1280  	{APADDQ, yxm, Pe, opBytes{0xd4}},
  1281  	{APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
  1282  	{APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
  1283  	{APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
  1284  	{APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
  1285  	{APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
  1286  	{APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
  1287  	{APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
  1288  	{APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
  1289  	{APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
  1290  	{APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
  1291  	{APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
  1292  	{APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
  1293  	{APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
  1294  	{APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
  1295  	{APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
  1296  	{APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
  1297  	{APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
  1298  	{APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
  1299  	{APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
  1300  	{APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
  1301  	{APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
  1302  	{APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
  1303  	{APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
  1304  	{APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
  1305  	{APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
  1306  	{APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
  1307  	{APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1308  	{APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
  1309  	{APHADDW, yxm_q4, Pq4, opBytes{0x01}},
  1310  	{APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
  1311  	{APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
  1312  	{APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
  1313  	{APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
  1314  	{APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
  1315  	{APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
  1316  	{APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
  1317  	{APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
  1318  	{APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
  1319  	{APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
  1320  	{APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
  1321  	{APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
  1322  	{APMAXSW, yxm, Pe, opBytes{0xee}},
  1323  	{APMAXUB, yxm, Pe, opBytes{0xde}},
  1324  	{APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
  1325  	{APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
  1326  	{APMINSB, yxm_q4, Pq4, opBytes{0x38}},
  1327  	{APMINSD, yxm_q4, Pq4, opBytes{0x39}},
  1328  	{APMINSW, yxm, Pe, opBytes{0xea}},
  1329  	{APMINUB, yxm, Pe, opBytes{0xda}},
  1330  	{APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
  1331  	{APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
  1332  	{APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
  1333  	{APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
  1334  	{APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
  1335  	{APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
  1336  	{APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
  1337  	{APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
  1338  	{APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
  1339  	{APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
  1340  	{APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
  1341  	{APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
  1342  	{APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
  1343  	{APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
  1344  	{APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
  1345  	{APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
  1346  	{APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
  1347  	{APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
  1348  	{APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
  1349  	{APMULLD, yxm_q4, Pq4, opBytes{0x40}},
  1350  	{APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
  1351  	{APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
  1352  	{APOPAL, ynone, P32, opBytes{0x61}},
  1353  	{APOPAW, ynone, Pe, opBytes{0x61}},
  1354  	{APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
  1355  	{APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
  1356  	{APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
  1357  	{APOPFL, ynone, P32, opBytes{0x9d}},
  1358  	{APOPFQ, ynone, Py, opBytes{0x9d}},
  1359  	{APOPFW, ynone, Pe, opBytes{0x9d}},
  1360  	{APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
  1361  	{APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
  1362  	{APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
  1363  	{APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
  1364  	{APSADBW, yxm, Pq, opBytes{0xf6}},
  1365  	{APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
  1366  	{APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
  1367  	{APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
  1368  	{APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
  1369  	{APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
  1370  	{APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
  1371  	{APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
  1372  	{APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
  1373  	{APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
  1374  	{APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1375  	{APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1376  	{APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1377  	{APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1378  	{APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1379  	{APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
  1380  	{APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1381  	{APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1382  	{APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1383  	{APSUBB, yxm, Pe, opBytes{0xf8}},
  1384  	{APSUBL, yxm, Pe, opBytes{0xfa}},
  1385  	{APSUBQ, yxm, Pe, opBytes{0xfb}},
  1386  	{APSUBSB, yxm, Pe, opBytes{0xe8}},
  1387  	{APSUBSW, yxm, Pe, opBytes{0xe9}},
  1388  	{APSUBUSB, yxm, Pe, opBytes{0xd8}},
  1389  	{APSUBUSW, yxm, Pe, opBytes{0xd9}},
  1390  	{APSUBW, yxm, Pe, opBytes{0xf9}},
  1391  	{APTEST, yxm_q4, Pq4, opBytes{0x17}},
  1392  	{APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
  1393  	{APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
  1394  	{APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
  1395  	{APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
  1396  	{APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
  1397  	{APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
  1398  	{APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
  1399  	{APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
  1400  	{APUSHAL, ynone, P32, opBytes{0x60}},
  1401  	{APUSHAW, ynone, Pe, opBytes{0x60}},
  1402  	{APUSHFL, ynone, P32, opBytes{0x9c}},
  1403  	{APUSHFQ, ynone, Py, opBytes{0x9c}},
  1404  	{APUSHFW, ynone, Pe, opBytes{0x9c}},
  1405  	{APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1406  	{APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1407  	{APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1408  	{APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
  1409  	{AQUAD, ybyte, Px, opBytes{8}},
  1410  	{ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1411  	{ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1412  	{ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1413  	{ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1414  	{ARCPPS, yxm, Pm, opBytes{0x53}},
  1415  	{ARCPSS, yxm, Pf3, opBytes{0x53}},
  1416  	{ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1417  	{ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1418  	{ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1419  	{ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1420  	{AREP, ynone, Px, opBytes{0xf3}},
  1421  	{AREPN, ynone, Px, opBytes{0xf2}},
  1422  	{obj.ARET, ynone, Px, opBytes{0xc3}},
  1423  	{ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
  1424  	{ARETFL, yret, Px, opBytes{0xcb, 0xca}},
  1425  	{ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
  1426  	{AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1427  	{AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1428  	{AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1429  	{AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1430  	{ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1431  	{ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1432  	{ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1433  	{ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1434  	{ARSQRTPS, yxm, Pm, opBytes{0x52}},
  1435  	{ARSQRTSS, yxm, Pf3, opBytes{0x52}},
  1436  	{ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL
  1437  	{ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1438  	{ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1439  	{ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1440  	{ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1441  	{ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1442  	{ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1443  	{ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1444  	{ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1445  	{ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
  1446  	{ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1447  	{ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1448  	{ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1449  	{ASCASB, ynone, Pb, opBytes{0xae}},
  1450  	{ASCASL, ynone, Px, opBytes{0xaf}},
  1451  	{ASCASQ, ynone, Pw, opBytes{0xaf}},
  1452  	{ASCASW, ynone, Pe, opBytes{0xaf}},
  1453  	{ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
  1454  	{ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
  1455  	{ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
  1456  	{ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
  1457  	{ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
  1458  	{ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
  1459  	{ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
  1460  	{ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
  1461  	{ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
  1462  	{ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
  1463  	{ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
  1464  	{ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
  1465  	{ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
  1466  	{ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
  1467  	{ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
  1468  	{ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
  1469  	{ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1470  	{ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1471  	{ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1472  	{ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1473  	{ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1474  	{ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1475  	{ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1476  	{ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1477  	{ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
  1478  	{ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
  1479  	{ASQRTPD, yxm, Pe, opBytes{0x51}},
  1480  	{ASQRTPS, yxm, Pm, opBytes{0x51}},
  1481  	{ASQRTSD, yxm, Pf2, opBytes{0x51}},
  1482  	{ASQRTSS, yxm, Pf3, opBytes{0x51}},
  1483  	{ASTC, ynone, Px, opBytes{0xf9}},
  1484  	{ASTD, ynone, Px, opBytes{0xfd}},
  1485  	{ASTI, ynone, Px, opBytes{0xfb}},
  1486  	{ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
  1487  	{ASTOSB, ynone, Pb, opBytes{0xaa}},
  1488  	{ASTOSL, ynone, Px, opBytes{0xab}},
  1489  	{ASTOSQ, ynone, Pw, opBytes{0xab}},
  1490  	{ASTOSW, ynone, Pe, opBytes{0xab}},
  1491  	{ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
  1492  	{ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1493  	{ASUBPD, yxm, Pe, opBytes{0x5c}},
  1494  	{ASUBPS, yxm, Pm, opBytes{0x5c}},
  1495  	{ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1496  	{ASUBSD, yxm, Pf2, opBytes{0x5c}},
  1497  	{ASUBSS, yxm, Pf3, opBytes{0x5c}},
  1498  	{ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1499  	{ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
  1500  	{ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall
  1501  	{ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
  1502  	{ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1503  	{ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1504  	{ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1505  	{ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}},
  1506  	{obj.ATEXT, ytext, Px, opBytes{}},
  1507  	{AUCOMISD, yxm, Pe, opBytes{0x2e}},
  1508  	{AUCOMISS, yxm, Pm, opBytes{0x2e}},
  1509  	{AUNPCKHPD, yxm, Pe, opBytes{0x15}},
  1510  	{AUNPCKHPS, yxm, Pm, opBytes{0x15}},
  1511  	{AUNPCKLPD, yxm, Pe, opBytes{0x14}},
  1512  	{AUNPCKLPS, yxm, Pm, opBytes{0x14}},
  1513  	{AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}},
  1514  	{AVERR, ydivl, Pm, opBytes{0x00, 04}},
  1515  	{AVERW, ydivl, Pm, opBytes{0x00, 05}},
  1516  	{AWAIT, ynone, Px, opBytes{0x9b}},
  1517  	{AWORD, ybyte, Px, opBytes{2}},
  1518  	{AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
  1519  	{AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
  1520  	{AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
  1521  	{AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
  1522  	{AXLAT, ynone, Px, opBytes{0xd7}},
  1523  	{AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
  1524  	{AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1525  	{AXORPD, yxm, Pe, opBytes{0x57}},
  1526  	{AXORPS, yxm, Pm, opBytes{0x57}},
  1527  	{AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1528  	{AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1529  	{AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
  1530  	{AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
  1531  	{AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1532  	{AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
  1533  	{AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
  1534  	{AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
  1535  	{AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
  1536  	{AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
  1537  	{AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
  1538  	{AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
  1539  	{AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
  1540  	{AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
  1541  	{AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
  1542  	{AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
  1543  	{AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
  1544  	{AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
  1545  	{AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
  1546  	{AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
  1547  	{AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
  1548  	{AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
  1549  	{AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
  1550  	{AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
  1551  	{AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
  1552  	{AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
  1553  	{AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
  1554  	{AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
  1555  	{AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
  1556  	{AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
  1557  	{AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}},  // botch
  1558  	{AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch
  1559  	{AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
  1560  	{AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
  1561  	{AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
  1562  	{AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
  1563  	{AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
  1564  	{AFCOML, yfmvx, Px, opBytes{0xda, 02}},
  1565  	{AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
  1566  	{AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
  1567  	{AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
  1568  	{AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
  1569  	{AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
  1570  	{AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
  1571  	{AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
  1572  	{AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
  1573  	{AFADDDP, ycompp, Px, opBytes{0xde, 00}},
  1574  	{AFADDW, yfmvx, Px, opBytes{0xde, 00}},
  1575  	{AFADDL, yfmvx, Px, opBytes{0xda, 00}},
  1576  	{AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
  1577  	{AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1578  	{AFMULDP, ycompp, Px, opBytes{0xde, 01}},
  1579  	{AFMULW, yfmvx, Px, opBytes{0xde, 01}},
  1580  	{AFMULL, yfmvx, Px, opBytes{0xda, 01}},
  1581  	{AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
  1582  	{AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1583  	{AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
  1584  	{AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
  1585  	{AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
  1586  	{AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
  1587  	{AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1588  	{AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
  1589  	{AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
  1590  	{AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
  1591  	{AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
  1592  	{AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1593  	{AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
  1594  	{AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
  1595  	{AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
  1596  	{AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
  1597  	{AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1598  	{AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
  1599  	{AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
  1600  	{AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
  1601  	{AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
  1602  	{AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1603  	{AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
  1604  	{AFFREE, nil, 0, opBytes{}},
  1605  	{AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
  1606  	{AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
  1607  	{AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
  1608  	{AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
  1609  	{AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
  1610  	{AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
  1611  	{AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
  1612  	{AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
  1613  	{AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
  1614  	{AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
  1615  	{AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
  1616  	{AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
  1617  	{AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
  1618  	{AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
  1619  	{AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
  1620  	{AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
  1621  	{AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
  1622  	{AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
  1623  	{AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
  1624  	{AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
  1625  	{AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
  1626  	{AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
  1627  	{AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
  1628  	{AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
  1629  	{AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
  1630  	{AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
  1631  	{AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
  1632  	{AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
  1633  	{AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
  1634  	{AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
  1635  	{AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
  1636  	{AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
  1637  	{AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
  1638  	{AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
  1639  	{AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
  1640  	{AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
  1641  	{AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
  1642  	{AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
  1643  	{AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
  1644  	{ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
  1645  	{ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
  1646  	{ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
  1647  	{ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
  1648  	{ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
  1649  	{ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
  1650  	{AINVD, ynone, Pm, opBytes{0x08}},
  1651  	{AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
  1652  	{AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
  1653  	{ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
  1654  	{AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
  1655  	{AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
  1656  	{AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
  1657  	{ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
  1658  	{ARDMSR, ynone, Pm, opBytes{0x32}},
  1659  	{ARDPMC, ynone, Pm, opBytes{0x33}},
  1660  	{ARDTSC, ynone, Pm, opBytes{0x31}},
  1661  	{ARSM, ynone, Pm, opBytes{0xaa}},
  1662  	{ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
  1663  	{ASYSRET, ynone, Pm, opBytes{0x07}},
  1664  	{AWBINVD, ynone, Pm, opBytes{0x09}},
  1665  	{AWRMSR, ynone, Pm, opBytes{0x30}},
  1666  	{AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
  1667  	{AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
  1668  	{AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
  1669  	{AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
  1670  	{AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
  1671  	{ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1672  	{ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1673  	{ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1674  	{ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1675  	{APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
  1676  	{APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
  1677  	{APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
  1678  	{APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
  1679  	{AMOVQL, yrl_ml, Px, opBytes{0x89}},
  1680  	{obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
  1681  	{AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
  1682  	{AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
  1683  	{AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
  1684  	{AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
  1685  	{AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
  1686  	{AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
  1687  	{AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
  1688  	{AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
  1689  	{AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
  1690  	{AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
  1691  	{APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
  1692  	{APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
  1693  	{APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
  1694  	{APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
  1695  	{AMOVDDUP, yxm, Pf2, opBytes{0x12}},
  1696  	{AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
  1697  	{AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
  1698  	{ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
  1699  	{ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
  1700  	{AUD1, ynone, Pm, opBytes{0xb9, 0}},
  1701  	{AUD2, ynone, Pm, opBytes{0x0b, 0}},
  1702  	{AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}},
  1703  	{ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
  1704  	{ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
  1705  	{ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
  1706  	{ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
  1707  	{ALMSW, ydivl, Pm, opBytes{0x01, 06}},
  1708  	{ALLDT, ydivl, Pm, opBytes{0x00, 02}},
  1709  	{ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
  1710  	{ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
  1711  	{ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1712  	{ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1713  	{ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1714  	{AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
  1715  	{AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
  1716  	{AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
  1717  	{AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
  1718  	{AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
  1719  	{AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
  1720  	{AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
  1721  	{AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
  1722  	{AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
  1723  	{AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
  1724  	{AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
  1725  	{AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
  1726  	{ASGDT, yclflush, Pm, opBytes{0x01, 00}},
  1727  	{ASIDT, yclflush, Pm, opBytes{0x01, 01}},
  1728  	{ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
  1729  	{ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
  1730  	{ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
  1731  	{ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
  1732  	{ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
  1733  	{ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
  1734  	{ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
  1735  	{ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
  1736  	{ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
  1737  	{AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
  1738  	{AMOVBEW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1739  	{AMOVBEL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1740  	{AMOVBEQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
  1741  	{ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
  1742  	{ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
  1743  	{ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
  1744  	{ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
  1745  	{ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
  1746  	{ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
  1747  	{ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
  1748  	{ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
  1749  	{ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
  1750  	{ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
  1751  	{APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
  1752  	{ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
  1753  	{ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
  1754  	{ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
  1755  	{ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
  1756  	{ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
  1757  	{ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
  1758  	{ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
  1759  	{ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
  1760  	{ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
  1761  	{ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
  1762  	{ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
  1763  	{AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
  1764  	{AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
  1765  	{AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
  1766  	{AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
  1767  	{ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
  1768  	{ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
  1769  	{ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
  1770  	{ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
  1771  	{ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
  1772  	{ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
  1773  	{ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
  1774  	{ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
  1775  	{ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
  1776  
  1777  	{ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
  1778  	{ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
  1779  	{AXACQUIRE, ynone, Px, opBytes{0xf2}},
  1780  	{AXRELEASE, ynone, Px, opBytes{0xf3}},
  1781  	{AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
  1782  	{AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
  1783  	{AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
  1784  	{AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
  1785  	{AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
  1786  	{obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
  1787  	{obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
  1788  	{obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
  1789  	{obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
  1790  
  1791  	{obj.AEND, nil, 0, opBytes{}},
  1792  	{0, nil, 0, opBytes{}},
  1793  }
  1794  
  1795  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1796  
  1797  // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1798  // This happens on systems like Solaris that call .so functions instead of system calls.
  1799  // It does not seem to be necessary for any other systems. This is probably working
  1800  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1801  // what that bug is. And this does fix it.
  1802  func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1803  	if ctxt.Headtype == objabi.Hsolaris {
  1804  		// All the Solaris dynamic imports from libc.so begin with "libc_".
  1805  		return strings.HasPrefix(s.Name, "libc_")
  1806  	}
  1807  	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1808  }
  1809  
  1810  // single-instruction no-ops of various lengths.
  1811  // constructed by hand and disassembled with gdb to verify.
  1812  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1813  var nop = [][16]uint8{
  1814  	{0x90},
  1815  	{0x66, 0x90},
  1816  	{0x0F, 0x1F, 0x00},
  1817  	{0x0F, 0x1F, 0x40, 0x00},
  1818  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1819  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1820  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1821  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1822  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1823  }
  1824  
  1825  // Native Client rejects the repeated 0x66 prefix.
  1826  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1827  func fillnop(p []byte, n int) {
  1828  	var m int
  1829  
  1830  	for n > 0 {
  1831  		m = n
  1832  		if m > len(nop) {
  1833  			m = len(nop)
  1834  		}
  1835  		copy(p[:m], nop[m-1][:m])
  1836  		p = p[m:]
  1837  		n -= m
  1838  	}
  1839  }
  1840  
  1841  func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1842  	s.Grow(int64(c) + int64(pad))
  1843  	fillnop(s.P[c:], int(pad))
  1844  	return c + pad
  1845  }
  1846  
  1847  func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
  1848  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1849  		return l
  1850  	}
  1851  	return q
  1852  }
  1853  
  1854  // isJump returns whether p is a jump instruction.
  1855  // It is used to ensure that no standalone or macro-fused jump will straddle
  1856  // or end on a 32 byte boundary by inserting NOPs before the jumps.
  1857  func isJump(p *obj.Prog) bool {
  1858  	return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL ||
  1859  		p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO
  1860  }
  1861  
  1862  // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional
  1863  // jump. Otherwise, nil is returned.
  1864  func lookForJCC(p *obj.Prog) *obj.Prog {
  1865  	// Skip any PCDATA, FUNCDATA or NOP instructions
  1866  	var q *obj.Prog
  1867  	for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link {
  1868  	}
  1869  
  1870  	if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL {
  1871  		return nil
  1872  	}
  1873  
  1874  	switch q.As {
  1875  	case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI,
  1876  		AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT:
  1877  	default:
  1878  		return nil
  1879  	}
  1880  
  1881  	return q
  1882  }
  1883  
  1884  // fusedJump determines whether p can be fused with a subsequent conditional jump instruction.
  1885  // If it can, we return true followed by the total size of the fused jump. If it can't, we return false.
  1886  // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2.
  1887  func fusedJump(p *obj.Prog) (bool, uint8) {
  1888  	var fusedSize uint8
  1889  
  1890  	// The first instruction in a macro fused pair may be preceded by the LOCK prefix,
  1891  	// or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we
  1892  	// need to be careful to insert any padding before the locks rather than directly after them.
  1893  
  1894  	if p.As == AXRELEASE || p.As == AXACQUIRE {
  1895  		fusedSize += p.Isize
  1896  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1897  		}
  1898  		if p == nil {
  1899  			return false, 0
  1900  		}
  1901  	}
  1902  	if p.As == ALOCK {
  1903  		fusedSize += p.Isize
  1904  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1905  		}
  1906  		if p == nil {
  1907  			return false, 0
  1908  		}
  1909  	}
  1910  	cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW
  1911  
  1912  	cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ ||
  1913  		p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp
  1914  
  1915  	testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW ||
  1916  		p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW
  1917  
  1918  	incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW ||
  1919  		p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW
  1920  
  1921  	if !cmpAddSub && !testAnd && !incDec {
  1922  		return false, 0
  1923  	}
  1924  
  1925  	if !incDec {
  1926  		var argOne obj.AddrType
  1927  		var argTwo obj.AddrType
  1928  		if cmp {
  1929  			argOne = p.From.Type
  1930  			argTwo = p.To.Type
  1931  		} else {
  1932  			argOne = p.To.Type
  1933  			argTwo = p.From.Type
  1934  		}
  1935  		if argOne == obj.TYPE_REG {
  1936  			if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM {
  1937  				return false, 0
  1938  			}
  1939  		} else if argOne == obj.TYPE_MEM {
  1940  			if argTwo != obj.TYPE_REG {
  1941  				return false, 0
  1942  			}
  1943  		} else {
  1944  			return false, 0
  1945  		}
  1946  	}
  1947  
  1948  	fusedSize += p.Isize
  1949  	jmp := lookForJCC(p)
  1950  	if jmp == nil {
  1951  		return false, 0
  1952  	}
  1953  
  1954  	fusedSize += jmp.Isize
  1955  
  1956  	if testAnd {
  1957  		return true, fusedSize
  1958  	}
  1959  
  1960  	if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI ||
  1961  		jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC {
  1962  		return false, 0
  1963  	}
  1964  
  1965  	if cmpAddSub {
  1966  		return true, fusedSize
  1967  	}
  1968  
  1969  	if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS {
  1970  		return false, 0
  1971  	}
  1972  
  1973  	return true, fusedSize
  1974  }
  1975  
  1976  type padJumpsCtx int32
  1977  
  1978  func makePjcCtx(ctxt *obj.Link) padJumpsCtx {
  1979  	// Disable jump padding on 32 bit builds by settting
  1980  	// padJumps to 0.
  1981  	if ctxt.Arch.Family == sys.I386 {
  1982  		return padJumpsCtx(0)
  1983  	}
  1984  
  1985  	// Disable jump padding for hand written assembly code.
  1986  	if ctxt.IsAsm {
  1987  		return padJumpsCtx(0)
  1988  	}
  1989  
  1990  	return padJumpsCtx(32)
  1991  }
  1992  
  1993  // padJump detects whether the instruction being assembled is a standalone or a macro-fused
  1994  // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does
  1995  // not cross or end on a 32 byte boundary.
  1996  func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 {
  1997  	if pjc == 0 {
  1998  		return c
  1999  	}
  2000  
  2001  	var toPad int32
  2002  	fj, fjSize := fusedJump(p)
  2003  	mask := int32(pjc - 1)
  2004  	if fj {
  2005  		if (c&mask)+int32(fjSize) >= int32(pjc) {
  2006  			toPad = int32(pjc) - (c & mask)
  2007  		}
  2008  	} else if isJump(p) {
  2009  		if (c&mask)+int32(p.Isize) >= int32(pjc) {
  2010  			toPad = int32(pjc) - (c & mask)
  2011  		}
  2012  	}
  2013  	if toPad <= 0 {
  2014  		return c
  2015  	}
  2016  
  2017  	return noppad(ctxt, s, c, toPad)
  2018  }
  2019  
  2020  // reAssemble is called if an instruction's size changes during assembly. If
  2021  // it does and the instruction is a standalone or a macro-fused jump we need to
  2022  // reassemble.
  2023  func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool {
  2024  	if pjc == 0 {
  2025  		return false
  2026  	}
  2027  
  2028  	fj, _ := fusedJump(p)
  2029  	return fj || isJump(p)
  2030  }
  2031  
  2032  type nopPad struct {
  2033  	p *obj.Prog // Instruction before the pad
  2034  	n int32     // Size of the pad
  2035  }
  2036  
  2037  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  2038  	if ctxt.Retpoline && ctxt.Arch.Family == sys.I386 {
  2039  		ctxt.Diag("-spectre=ret not supported on 386")
  2040  		ctxt.Retpoline = false // don't keep printing
  2041  	}
  2042  
  2043  	pjc := makePjcCtx(ctxt)
  2044  
  2045  	if s.P != nil {
  2046  		return
  2047  	}
  2048  
  2049  	if ycover[0] == 0 {
  2050  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  2051  	}
  2052  
  2053  	for p := s.Func().Text; p != nil; p = p.Link {
  2054  		if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil {
  2055  			p.To.SetTarget(p)
  2056  		}
  2057  		if p.As == AADJSP {
  2058  			p.To.Type = obj.TYPE_REG
  2059  			p.To.Reg = REG_SP
  2060  			// Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive.
  2061  			// One exception: It is smaller to encode $-0x80 than $0x80.
  2062  			// For that case, flip the sign and the op:
  2063  			// Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'.
  2064  			switch v := p.From.Offset; {
  2065  			case v == 0:
  2066  				p.As = obj.ANOP
  2067  			case v == 0x80 || (v < 0 && v != -0x80):
  2068  				p.As = spadjop(ctxt, AADDL, AADDQ)
  2069  				p.From.Offset *= -1
  2070  			default:
  2071  				p.As = spadjop(ctxt, ASUBL, ASUBQ)
  2072  			}
  2073  		}
  2074  		if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) {
  2075  			if p.To.Type != obj.TYPE_REG {
  2076  				ctxt.Diag("non-retpoline-compatible: %v", p)
  2077  				continue
  2078  			}
  2079  			p.To.Type = obj.TYPE_BRANCH
  2080  			p.To.Name = obj.NAME_EXTERN
  2081  			p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg)))
  2082  			p.To.Reg = 0
  2083  			p.To.Offset = 0
  2084  		}
  2085  	}
  2086  
  2087  	var count int64 // rough count of number of instructions
  2088  	for p := s.Func().Text; p != nil; p = p.Link {
  2089  		count++
  2090  		p.Back = branchShort // use short branches first time through
  2091  		if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) {
  2092  			p.Back |= branchBackwards
  2093  			q.Back |= branchLoopHead
  2094  		}
  2095  	}
  2096  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  2097  
  2098  	var ab AsmBuf
  2099  	var n int
  2100  	var c int32
  2101  	errors := ctxt.Errors
  2102  	var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies)
  2103  	nrelocs0 := len(s.R)
  2104  	for {
  2105  		// This loop continues while there are reasons to re-assemble
  2106  		// whole block, like the presence of long forward jumps.
  2107  		reAssemble := false
  2108  		for i := range s.R[nrelocs0:] {
  2109  			s.R[nrelocs0+i] = obj.Reloc{}
  2110  		}
  2111  		s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler
  2112  		s.P = s.P[:0]
  2113  		c = 0
  2114  		var pPrev *obj.Prog
  2115  		nops = nops[:0]
  2116  		for p := s.Func().Text; p != nil; p = p.Link {
  2117  			c0 := c
  2118  			c = pjc.padJump(ctxt, s, p, c)
  2119  
  2120  			if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
  2121  				// pad with NOPs
  2122  				v := -c & (loopAlign - 1)
  2123  
  2124  				if v <= maxLoopPad {
  2125  					s.Grow(int64(c) + int64(v))
  2126  					fillnop(s.P[c:], int(v))
  2127  					c += v
  2128  				}
  2129  			}
  2130  
  2131  			p.Pc = int64(c)
  2132  
  2133  			// process forward jumps to p
  2134  			for q := p.Rel; q != nil; q = q.Forwd {
  2135  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  2136  				if q.Back&branchShort != 0 {
  2137  					if v > 127 {
  2138  						reAssemble = true
  2139  						q.Back ^= branchShort
  2140  					}
  2141  
  2142  					if q.As == AJCXZL || q.As == AXBEGIN {
  2143  						s.P[q.Pc+2] = byte(v)
  2144  					} else {
  2145  						s.P[q.Pc+1] = byte(v)
  2146  					}
  2147  				} else {
  2148  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  2149  				}
  2150  			}
  2151  
  2152  			p.Rel = nil
  2153  
  2154  			p.Pc = int64(c)
  2155  			ab.asmins(ctxt, s, p)
  2156  			m := ab.Len()
  2157  			if int(p.Isize) != m {
  2158  				p.Isize = uint8(m)
  2159  				if pjc.reAssemble(p) {
  2160  					// We need to re-assemble here to check for jumps and fused jumps
  2161  					// that span or end on 32 byte boundaries.
  2162  					reAssemble = true
  2163  				}
  2164  			}
  2165  
  2166  			s.Grow(p.Pc + int64(m))
  2167  			copy(s.P[p.Pc:], ab.Bytes())
  2168  			// If there was padding, remember it.
  2169  			if pPrev != nil && !ctxt.IsAsm && c > c0 {
  2170  				nops = append(nops, nopPad{p: pPrev, n: c - c0})
  2171  			}
  2172  			c += int32(m)
  2173  			pPrev = p
  2174  		}
  2175  
  2176  		n++
  2177  		if n > 1000 {
  2178  			ctxt.Diag("span must be looping")
  2179  			log.Fatalf("loop")
  2180  		}
  2181  		if !reAssemble {
  2182  			break
  2183  		}
  2184  		if ctxt.Errors > errors {
  2185  			return
  2186  		}
  2187  	}
  2188  	// splice padding nops into Progs
  2189  	for _, n := range nops {
  2190  		pp := n.p
  2191  		np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)}
  2192  		pp.Link = np
  2193  	}
  2194  
  2195  	s.Size = int64(c)
  2196  
  2197  	if false { /* debug['a'] > 1 */
  2198  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  2199  		var i int
  2200  		for i = 0; i < len(s.P); i++ {
  2201  			fmt.Printf(" %.2x", s.P[i])
  2202  			if i%16 == 15 {
  2203  				fmt.Printf("\n  %.6x", uint(i+1))
  2204  			}
  2205  		}
  2206  
  2207  		if i%16 != 0 {
  2208  			fmt.Printf("\n")
  2209  		}
  2210  
  2211  		for i := 0; i < len(s.R); i++ {
  2212  			r := &s.R[i]
  2213  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2214  		}
  2215  	}
  2216  
  2217  	// Mark nonpreemptible instruction sequences.
  2218  	// The 2-instruction TLS access sequence
  2219  	//	MOVQ TLS, BX
  2220  	//	MOVQ 0(BX)(TLS*1), BX
  2221  	// is not async preemptible, as if it is preempted and resumed on
  2222  	// a different thread, the TLS address may become invalid.
  2223  	if !CanUse1InsnTLS(ctxt) {
  2224  		useTLS := func(p *obj.Prog) bool {
  2225  			// Only need to mark the second instruction, which has
  2226  			// REG_TLS as Index. (It is okay to interrupt and restart
  2227  			// the first instruction.)
  2228  			return p.From.Index == REG_TLS
  2229  		}
  2230  		obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil)
  2231  	}
  2232  }
  2233  
  2234  func instinit(ctxt *obj.Link) {
  2235  	if ycover[0] != 0 {
  2236  		// Already initialized; stop now.
  2237  		// This happens in the cmd/asm tests,
  2238  		// each of which re-initializes the arch.
  2239  		return
  2240  	}
  2241  
  2242  	switch ctxt.Headtype {
  2243  	case objabi.Hplan9:
  2244  		plan9privates = ctxt.Lookup("_privates")
  2245  	}
  2246  
  2247  	for i := range avxOptab {
  2248  		c := avxOptab[i].as
  2249  		if opindex[c&obj.AMask] != nil {
  2250  			ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
  2251  		}
  2252  		opindex[c&obj.AMask] = &avxOptab[i]
  2253  	}
  2254  	for i := 1; optab[i].as != 0; i++ {
  2255  		c := optab[i].as
  2256  		if opindex[c&obj.AMask] != nil {
  2257  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  2258  		}
  2259  		opindex[c&obj.AMask] = &optab[i]
  2260  	}
  2261  
  2262  	for i := 0; i < Ymax; i++ {
  2263  		ycover[i*Ymax+i] = 1
  2264  	}
  2265  
  2266  	ycover[Yi0*Ymax+Yu2] = 1
  2267  	ycover[Yi1*Ymax+Yu2] = 1
  2268  
  2269  	ycover[Yi0*Ymax+Yi8] = 1
  2270  	ycover[Yi1*Ymax+Yi8] = 1
  2271  	ycover[Yu2*Ymax+Yi8] = 1
  2272  	ycover[Yu7*Ymax+Yi8] = 1
  2273  
  2274  	ycover[Yi0*Ymax+Yu7] = 1
  2275  	ycover[Yi1*Ymax+Yu7] = 1
  2276  	ycover[Yu2*Ymax+Yu7] = 1
  2277  
  2278  	ycover[Yi0*Ymax+Yu8] = 1
  2279  	ycover[Yi1*Ymax+Yu8] = 1
  2280  	ycover[Yu2*Ymax+Yu8] = 1
  2281  	ycover[Yu7*Ymax+Yu8] = 1
  2282  
  2283  	ycover[Yi0*Ymax+Ys32] = 1
  2284  	ycover[Yi1*Ymax+Ys32] = 1
  2285  	ycover[Yu2*Ymax+Ys32] = 1
  2286  	ycover[Yu7*Ymax+Ys32] = 1
  2287  	ycover[Yu8*Ymax+Ys32] = 1
  2288  	ycover[Yi8*Ymax+Ys32] = 1
  2289  
  2290  	ycover[Yi0*Ymax+Yi32] = 1
  2291  	ycover[Yi1*Ymax+Yi32] = 1
  2292  	ycover[Yu2*Ymax+Yi32] = 1
  2293  	ycover[Yu7*Ymax+Yi32] = 1
  2294  	ycover[Yu8*Ymax+Yi32] = 1
  2295  	ycover[Yi8*Ymax+Yi32] = 1
  2296  	ycover[Ys32*Ymax+Yi32] = 1
  2297  
  2298  	ycover[Yi0*Ymax+Yi64] = 1
  2299  	ycover[Yi1*Ymax+Yi64] = 1
  2300  	ycover[Yu7*Ymax+Yi64] = 1
  2301  	ycover[Yu2*Ymax+Yi64] = 1
  2302  	ycover[Yu8*Ymax+Yi64] = 1
  2303  	ycover[Yi8*Ymax+Yi64] = 1
  2304  	ycover[Ys32*Ymax+Yi64] = 1
  2305  	ycover[Yi32*Ymax+Yi64] = 1
  2306  
  2307  	ycover[Yal*Ymax+Yrb] = 1
  2308  	ycover[Ycl*Ymax+Yrb] = 1
  2309  	ycover[Yax*Ymax+Yrb] = 1
  2310  	ycover[Ycx*Ymax+Yrb] = 1
  2311  	ycover[Yrx*Ymax+Yrb] = 1
  2312  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2313  
  2314  	ycover[Ycl*Ymax+Ycx] = 1
  2315  
  2316  	ycover[Yax*Ymax+Yrx] = 1
  2317  	ycover[Ycx*Ymax+Yrx] = 1
  2318  
  2319  	ycover[Yax*Ymax+Yrl] = 1
  2320  	ycover[Ycx*Ymax+Yrl] = 1
  2321  	ycover[Yrx*Ymax+Yrl] = 1
  2322  	ycover[Yrl32*Ymax+Yrl] = 1
  2323  
  2324  	ycover[Yf0*Ymax+Yrf] = 1
  2325  
  2326  	ycover[Yal*Ymax+Ymb] = 1
  2327  	ycover[Ycl*Ymax+Ymb] = 1
  2328  	ycover[Yax*Ymax+Ymb] = 1
  2329  	ycover[Ycx*Ymax+Ymb] = 1
  2330  	ycover[Yrx*Ymax+Ymb] = 1
  2331  	ycover[Yrb*Ymax+Ymb] = 1
  2332  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2333  	ycover[Ym*Ymax+Ymb] = 1
  2334  
  2335  	ycover[Yax*Ymax+Yml] = 1
  2336  	ycover[Ycx*Ymax+Yml] = 1
  2337  	ycover[Yrx*Ymax+Yml] = 1
  2338  	ycover[Yrl*Ymax+Yml] = 1
  2339  	ycover[Yrl32*Ymax+Yml] = 1
  2340  	ycover[Ym*Ymax+Yml] = 1
  2341  
  2342  	ycover[Yax*Ymax+Ymm] = 1
  2343  	ycover[Ycx*Ymax+Ymm] = 1
  2344  	ycover[Yrx*Ymax+Ymm] = 1
  2345  	ycover[Yrl*Ymax+Ymm] = 1
  2346  	ycover[Yrl32*Ymax+Ymm] = 1
  2347  	ycover[Ym*Ymax+Ymm] = 1
  2348  	ycover[Ymr*Ymax+Ymm] = 1
  2349  
  2350  	ycover[Yxr0*Ymax+Yxr] = 1
  2351  
  2352  	ycover[Ym*Ymax+Yxm] = 1
  2353  	ycover[Yxr0*Ymax+Yxm] = 1
  2354  	ycover[Yxr*Ymax+Yxm] = 1
  2355  
  2356  	ycover[Ym*Ymax+Yym] = 1
  2357  	ycover[Yyr*Ymax+Yym] = 1
  2358  
  2359  	ycover[Yxr0*Ymax+YxrEvex] = 1
  2360  	ycover[Yxr*Ymax+YxrEvex] = 1
  2361  
  2362  	ycover[Ym*Ymax+YxmEvex] = 1
  2363  	ycover[Yxr0*Ymax+YxmEvex] = 1
  2364  	ycover[Yxr*Ymax+YxmEvex] = 1
  2365  	ycover[YxrEvex*Ymax+YxmEvex] = 1
  2366  
  2367  	ycover[Yyr*Ymax+YyrEvex] = 1
  2368  
  2369  	ycover[Ym*Ymax+YymEvex] = 1
  2370  	ycover[Yyr*Ymax+YymEvex] = 1
  2371  	ycover[YyrEvex*Ymax+YymEvex] = 1
  2372  
  2373  	ycover[Ym*Ymax+Yzm] = 1
  2374  	ycover[Yzr*Ymax+Yzm] = 1
  2375  
  2376  	ycover[Yk0*Ymax+Yk] = 1
  2377  	ycover[Yknot0*Ymax+Yk] = 1
  2378  
  2379  	ycover[Yk0*Ymax+Ykm] = 1
  2380  	ycover[Yknot0*Ymax+Ykm] = 1
  2381  	ycover[Yk*Ymax+Ykm] = 1
  2382  	ycover[Ym*Ymax+Ykm] = 1
  2383  
  2384  	ycover[Yxvm*Ymax+YxvmEvex] = 1
  2385  
  2386  	ycover[Yyvm*Ymax+YyvmEvex] = 1
  2387  
  2388  	for i := 0; i < MAXREG; i++ {
  2389  		reg[i] = -1
  2390  		if i >= REG_AL && i <= REG_R15B {
  2391  			reg[i] = (i - REG_AL) & 7
  2392  			if i >= REG_SPB && i <= REG_DIB {
  2393  				regrex[i] = 0x40
  2394  			}
  2395  			if i >= REG_R8B && i <= REG_R15B {
  2396  				regrex[i] = Rxr | Rxx | Rxb
  2397  			}
  2398  		}
  2399  
  2400  		if i >= REG_AH && i <= REG_BH {
  2401  			reg[i] = 4 + ((i - REG_AH) & 7)
  2402  		}
  2403  		if i >= REG_AX && i <= REG_R15 {
  2404  			reg[i] = (i - REG_AX) & 7
  2405  			if i >= REG_R8 {
  2406  				regrex[i] = Rxr | Rxx | Rxb
  2407  			}
  2408  		}
  2409  
  2410  		if i >= REG_F0 && i <= REG_F0+7 {
  2411  			reg[i] = (i - REG_F0) & 7
  2412  		}
  2413  		if i >= REG_M0 && i <= REG_M0+7 {
  2414  			reg[i] = (i - REG_M0) & 7
  2415  		}
  2416  		if i >= REG_K0 && i <= REG_K0+7 {
  2417  			reg[i] = (i - REG_K0) & 7
  2418  		}
  2419  		if i >= REG_X0 && i <= REG_X0+15 {
  2420  			reg[i] = (i - REG_X0) & 7
  2421  			if i >= REG_X0+8 {
  2422  				regrex[i] = Rxr | Rxx | Rxb
  2423  			}
  2424  		}
  2425  		if i >= REG_X16 && i <= REG_X16+15 {
  2426  			reg[i] = (i - REG_X16) & 7
  2427  			if i >= REG_X16+8 {
  2428  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2429  			} else {
  2430  				regrex[i] = RxrEvex
  2431  			}
  2432  		}
  2433  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2434  			reg[i] = (i - REG_Y0) & 7
  2435  			if i >= REG_Y0+8 {
  2436  				regrex[i] = Rxr | Rxx | Rxb
  2437  			}
  2438  		}
  2439  		if i >= REG_Y16 && i <= REG_Y16+15 {
  2440  			reg[i] = (i - REG_Y16) & 7
  2441  			if i >= REG_Y16+8 {
  2442  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2443  			} else {
  2444  				regrex[i] = RxrEvex
  2445  			}
  2446  		}
  2447  		if i >= REG_Z0 && i <= REG_Z0+15 {
  2448  			reg[i] = (i - REG_Z0) & 7
  2449  			if i > REG_Z0+7 {
  2450  				regrex[i] = Rxr | Rxx | Rxb
  2451  			}
  2452  		}
  2453  		if i >= REG_Z16 && i <= REG_Z16+15 {
  2454  			reg[i] = (i - REG_Z16) & 7
  2455  			if i >= REG_Z16+8 {
  2456  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2457  			} else {
  2458  				regrex[i] = RxrEvex
  2459  			}
  2460  		}
  2461  
  2462  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2463  			regrex[i] = Rxr
  2464  		}
  2465  	}
  2466  }
  2467  
  2468  var isAndroid = buildcfg.GOOS == "android"
  2469  
  2470  func prefixof(ctxt *obj.Link, a *obj.Addr) int {
  2471  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2472  		return 0
  2473  	}
  2474  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2475  		switch a.Reg {
  2476  		case REG_CS:
  2477  			return 0x2e
  2478  
  2479  		case REG_DS:
  2480  			return 0x3e
  2481  
  2482  		case REG_ES:
  2483  			return 0x26
  2484  
  2485  		case REG_FS:
  2486  			return 0x64
  2487  
  2488  		case REG_GS:
  2489  			return 0x65
  2490  
  2491  		case REG_TLS:
  2492  			// NOTE: Systems listed here should be only systems that
  2493  			// support direct TLS references like 8(TLS) implemented as
  2494  			// direct references from FS or GS. Systems that require
  2495  			// the initial-exec model, where you load the TLS base into
  2496  			// a register and then index from that register, do not reach
  2497  			// this code and should not be listed.
  2498  			if ctxt.Arch.Family == sys.I386 {
  2499  				switch ctxt.Headtype {
  2500  				default:
  2501  					if isAndroid {
  2502  						return 0x65 // GS
  2503  					}
  2504  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2505  
  2506  				case objabi.Hdarwin,
  2507  					objabi.Hdragonfly,
  2508  					objabi.Hfreebsd,
  2509  					objabi.Hnetbsd,
  2510  					objabi.Hopenbsd:
  2511  					return 0x65 // GS
  2512  				}
  2513  			}
  2514  
  2515  			switch ctxt.Headtype {
  2516  			default:
  2517  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2518  
  2519  			case objabi.Hlinux:
  2520  				if isAndroid {
  2521  					return 0x64 // FS
  2522  				}
  2523  
  2524  				if ctxt.Flag_shared {
  2525  					log.Fatalf("unknown TLS base register for linux with -shared")
  2526  				} else {
  2527  					return 0x64 // FS
  2528  				}
  2529  
  2530  			case objabi.Hdragonfly,
  2531  				objabi.Hfreebsd,
  2532  				objabi.Hnetbsd,
  2533  				objabi.Hopenbsd,
  2534  				objabi.Hsolaris:
  2535  				return 0x64 // FS
  2536  
  2537  			case objabi.Hdarwin:
  2538  				return 0x65 // GS
  2539  			}
  2540  		}
  2541  	}
  2542  
  2543  	if ctxt.Arch.Family == sys.I386 {
  2544  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2545  			// When building for inclusion into a shared library, an instruction of the form
  2546  			//     MOVL off(CX)(TLS*1), AX
  2547  			// becomes
  2548  			//     mov %gs:off(%ecx), %eax
  2549  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2550  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2551  			// a shared library the instruction it becomes
  2552  			//     mov 0x0(%ecx), %eax
  2553  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2554  			return 0x65 // GS
  2555  		}
  2556  		return 0
  2557  	}
  2558  
  2559  	switch a.Index {
  2560  	case REG_CS:
  2561  		return 0x2e
  2562  
  2563  	case REG_DS:
  2564  		return 0x3e
  2565  
  2566  	case REG_ES:
  2567  		return 0x26
  2568  
  2569  	case REG_TLS:
  2570  		if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
  2571  			// When building for inclusion into a shared library, an instruction of the form
  2572  			//     MOV off(CX)(TLS*1), AX
  2573  			// becomes
  2574  			//     mov %fs:off(%rcx), %rax
  2575  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2576  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2577  			// a shared library the instruction does not require a prefix.
  2578  			return 0x64
  2579  		}
  2580  
  2581  	case REG_FS:
  2582  		return 0x64
  2583  
  2584  	case REG_GS:
  2585  		return 0x65
  2586  	}
  2587  
  2588  	return 0
  2589  }
  2590  
  2591  // oclassRegList returns multisource operand class for addr.
  2592  func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
  2593  	// TODO(quasilyte): when oclass register case is refactored into
  2594  	// lookup table, use it here to get register kind more easily.
  2595  	// Helper functions like regIsXmm should go away too (they will become redundant).
  2596  
  2597  	regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
  2598  	regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
  2599  	regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
  2600  
  2601  	reg0, reg1 := decodeRegisterRange(addr.Offset)
  2602  	low := regIndex(int16(reg0))
  2603  	high := regIndex(int16(reg1))
  2604  
  2605  	if ctxt.Arch.Family == sys.I386 {
  2606  		if low >= 8 || high >= 8 {
  2607  			return Yxxx
  2608  		}
  2609  	}
  2610  
  2611  	switch high - low {
  2612  	case 3:
  2613  		switch {
  2614  		case regIsXmm(reg0) && regIsXmm(reg1):
  2615  			return YxrEvexMulti4
  2616  		case regIsYmm(reg0) && regIsYmm(reg1):
  2617  			return YyrEvexMulti4
  2618  		case regIsZmm(reg0) && regIsZmm(reg1):
  2619  			return YzrMulti4
  2620  		default:
  2621  			return Yxxx
  2622  		}
  2623  	default:
  2624  		return Yxxx
  2625  	}
  2626  }
  2627  
  2628  // oclassVMem returns V-mem (vector memory with VSIB) operand class.
  2629  // For addr that is not V-mem returns (Yxxx, false).
  2630  func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
  2631  	switch addr.Index {
  2632  	case REG_X0 + 0,
  2633  		REG_X0 + 1,
  2634  		REG_X0 + 2,
  2635  		REG_X0 + 3,
  2636  		REG_X0 + 4,
  2637  		REG_X0 + 5,
  2638  		REG_X0 + 6,
  2639  		REG_X0 + 7:
  2640  		return Yxvm, true
  2641  	case REG_X8 + 0,
  2642  		REG_X8 + 1,
  2643  		REG_X8 + 2,
  2644  		REG_X8 + 3,
  2645  		REG_X8 + 4,
  2646  		REG_X8 + 5,
  2647  		REG_X8 + 6,
  2648  		REG_X8 + 7:
  2649  		if ctxt.Arch.Family == sys.I386 {
  2650  			return Yxxx, true
  2651  		}
  2652  		return Yxvm, true
  2653  	case REG_X16 + 0,
  2654  		REG_X16 + 1,
  2655  		REG_X16 + 2,
  2656  		REG_X16 + 3,
  2657  		REG_X16 + 4,
  2658  		REG_X16 + 5,
  2659  		REG_X16 + 6,
  2660  		REG_X16 + 7,
  2661  		REG_X16 + 8,
  2662  		REG_X16 + 9,
  2663  		REG_X16 + 10,
  2664  		REG_X16 + 11,
  2665  		REG_X16 + 12,
  2666  		REG_X16 + 13,
  2667  		REG_X16 + 14,
  2668  		REG_X16 + 15:
  2669  		if ctxt.Arch.Family == sys.I386 {
  2670  			return Yxxx, true
  2671  		}
  2672  		return YxvmEvex, true
  2673  
  2674  	case REG_Y0 + 0,
  2675  		REG_Y0 + 1,
  2676  		REG_Y0 + 2,
  2677  		REG_Y0 + 3,
  2678  		REG_Y0 + 4,
  2679  		REG_Y0 + 5,
  2680  		REG_Y0 + 6,
  2681  		REG_Y0 + 7:
  2682  		return Yyvm, true
  2683  	case REG_Y8 + 0,
  2684  		REG_Y8 + 1,
  2685  		REG_Y8 + 2,
  2686  		REG_Y8 + 3,
  2687  		REG_Y8 + 4,
  2688  		REG_Y8 + 5,
  2689  		REG_Y8 + 6,
  2690  		REG_Y8 + 7:
  2691  		if ctxt.Arch.Family == sys.I386 {
  2692  			return Yxxx, true
  2693  		}
  2694  		return Yyvm, true
  2695  	case REG_Y16 + 0,
  2696  		REG_Y16 + 1,
  2697  		REG_Y16 + 2,
  2698  		REG_Y16 + 3,
  2699  		REG_Y16 + 4,
  2700  		REG_Y16 + 5,
  2701  		REG_Y16 + 6,
  2702  		REG_Y16 + 7,
  2703  		REG_Y16 + 8,
  2704  		REG_Y16 + 9,
  2705  		REG_Y16 + 10,
  2706  		REG_Y16 + 11,
  2707  		REG_Y16 + 12,
  2708  		REG_Y16 + 13,
  2709  		REG_Y16 + 14,
  2710  		REG_Y16 + 15:
  2711  		if ctxt.Arch.Family == sys.I386 {
  2712  			return Yxxx, true
  2713  		}
  2714  		return YyvmEvex, true
  2715  
  2716  	case REG_Z0 + 0,
  2717  		REG_Z0 + 1,
  2718  		REG_Z0 + 2,
  2719  		REG_Z0 + 3,
  2720  		REG_Z0 + 4,
  2721  		REG_Z0 + 5,
  2722  		REG_Z0 + 6,
  2723  		REG_Z0 + 7:
  2724  		return Yzvm, true
  2725  	case REG_Z8 + 0,
  2726  		REG_Z8 + 1,
  2727  		REG_Z8 + 2,
  2728  		REG_Z8 + 3,
  2729  		REG_Z8 + 4,
  2730  		REG_Z8 + 5,
  2731  		REG_Z8 + 6,
  2732  		REG_Z8 + 7,
  2733  		REG_Z8 + 8,
  2734  		REG_Z8 + 9,
  2735  		REG_Z8 + 10,
  2736  		REG_Z8 + 11,
  2737  		REG_Z8 + 12,
  2738  		REG_Z8 + 13,
  2739  		REG_Z8 + 14,
  2740  		REG_Z8 + 15,
  2741  		REG_Z8 + 16,
  2742  		REG_Z8 + 17,
  2743  		REG_Z8 + 18,
  2744  		REG_Z8 + 19,
  2745  		REG_Z8 + 20,
  2746  		REG_Z8 + 21,
  2747  		REG_Z8 + 22,
  2748  		REG_Z8 + 23:
  2749  		if ctxt.Arch.Family == sys.I386 {
  2750  			return Yxxx, true
  2751  		}
  2752  		return Yzvm, true
  2753  	}
  2754  
  2755  	return Yxxx, false
  2756  }
  2757  
  2758  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2759  	switch a.Type {
  2760  	case obj.TYPE_REGLIST:
  2761  		return oclassRegList(ctxt, a)
  2762  
  2763  	case obj.TYPE_NONE:
  2764  		return Ynone
  2765  
  2766  	case obj.TYPE_BRANCH:
  2767  		return Ybr
  2768  
  2769  	case obj.TYPE_INDIR:
  2770  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2771  			return Yindir
  2772  		}
  2773  		return Yxxx
  2774  
  2775  	case obj.TYPE_MEM:
  2776  		// Pseudo registers have negative index, but SP is
  2777  		// not pseudo on x86, hence REG_SP check is not redundant.
  2778  		if a.Index == REG_SP || a.Index < 0 {
  2779  			// Can't use FP/SB/PC/SP as the index register.
  2780  			return Yxxx
  2781  		}
  2782  
  2783  		if vmem, ok := oclassVMem(ctxt, a); ok {
  2784  			return vmem
  2785  		}
  2786  
  2787  		if ctxt.Arch.Family == sys.AMD64 {
  2788  			switch a.Name {
  2789  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2790  				// Global variables can't use index registers and their
  2791  				// base register is %rip (%rip is encoded as REG_NONE).
  2792  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2793  					return Yxxx
  2794  				}
  2795  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2796  				// These names must have a base of SP.  The old compiler
  2797  				// uses 0 for the base register. SSA uses REG_SP.
  2798  				if a.Reg != REG_SP && a.Reg != 0 {
  2799  					return Yxxx
  2800  				}
  2801  			case obj.NAME_NONE:
  2802  				// everything is ok
  2803  			default:
  2804  				// unknown name
  2805  				return Yxxx
  2806  			}
  2807  		}
  2808  		return Ym
  2809  
  2810  	case obj.TYPE_ADDR:
  2811  		switch a.Name {
  2812  		case obj.NAME_GOTREF:
  2813  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2814  			return Yxxx
  2815  
  2816  		case obj.NAME_EXTERN,
  2817  			obj.NAME_STATIC:
  2818  			if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2819  				return Yi32
  2820  			}
  2821  			return Yiauto // use pc-relative addressing
  2822  
  2823  		case obj.NAME_AUTO,
  2824  			obj.NAME_PARAM:
  2825  			return Yiauto
  2826  		}
  2827  
  2828  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2829  		// and got Yi32 in an earlier version of this code.
  2830  		// Keep doing that until we fix yduff etc.
  2831  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2832  			return Yi32
  2833  		}
  2834  
  2835  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2836  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2837  		}
  2838  		fallthrough
  2839  
  2840  	case obj.TYPE_CONST:
  2841  		if a.Sym != nil {
  2842  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2843  		}
  2844  
  2845  		v := a.Offset
  2846  		if ctxt.Arch.Family == sys.I386 {
  2847  			v = int64(int32(v))
  2848  		}
  2849  		switch {
  2850  		case v == 0:
  2851  			return Yi0
  2852  		case v == 1:
  2853  			return Yi1
  2854  		case v >= 0 && v <= 3:
  2855  			return Yu2
  2856  		case v >= 0 && v <= 127:
  2857  			return Yu7
  2858  		case v >= 0 && v <= 255:
  2859  			return Yu8
  2860  		case v >= -128 && v <= 127:
  2861  			return Yi8
  2862  		}
  2863  		if ctxt.Arch.Family == sys.I386 {
  2864  			return Yi32
  2865  		}
  2866  		l := int32(v)
  2867  		if int64(l) == v {
  2868  			return Ys32 // can sign extend
  2869  		}
  2870  		if v>>32 == 0 {
  2871  			return Yi32 // unsigned
  2872  		}
  2873  		return Yi64
  2874  
  2875  	case obj.TYPE_TEXTSIZE:
  2876  		return Ytextsize
  2877  	}
  2878  
  2879  	if a.Type != obj.TYPE_REG {
  2880  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2881  		return Yxxx
  2882  	}
  2883  
  2884  	switch a.Reg {
  2885  	case REG_AL:
  2886  		return Yal
  2887  
  2888  	case REG_AX:
  2889  		return Yax
  2890  
  2891  		/*
  2892  			case REG_SPB:
  2893  		*/
  2894  	case REG_BPB,
  2895  		REG_SIB,
  2896  		REG_DIB,
  2897  		REG_R8B,
  2898  		REG_R9B,
  2899  		REG_R10B,
  2900  		REG_R11B,
  2901  		REG_R12B,
  2902  		REG_R13B,
  2903  		REG_R14B,
  2904  		REG_R15B:
  2905  		if ctxt.Arch.Family == sys.I386 {
  2906  			return Yxxx
  2907  		}
  2908  		fallthrough
  2909  
  2910  	case REG_DL,
  2911  		REG_BL,
  2912  		REG_AH,
  2913  		REG_CH,
  2914  		REG_DH,
  2915  		REG_BH:
  2916  		return Yrb
  2917  
  2918  	case REG_CL:
  2919  		return Ycl
  2920  
  2921  	case REG_CX:
  2922  		return Ycx
  2923  
  2924  	case REG_DX, REG_BX:
  2925  		return Yrx
  2926  
  2927  	case REG_R8, // not really Yrl
  2928  		REG_R9,
  2929  		REG_R10,
  2930  		REG_R11,
  2931  		REG_R12,
  2932  		REG_R13,
  2933  		REG_R14,
  2934  		REG_R15:
  2935  		if ctxt.Arch.Family == sys.I386 {
  2936  			return Yxxx
  2937  		}
  2938  		fallthrough
  2939  
  2940  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2941  		if ctxt.Arch.Family == sys.I386 {
  2942  			return Yrl32
  2943  		}
  2944  		return Yrl
  2945  
  2946  	case REG_F0 + 0:
  2947  		return Yf0
  2948  
  2949  	case REG_F0 + 1,
  2950  		REG_F0 + 2,
  2951  		REG_F0 + 3,
  2952  		REG_F0 + 4,
  2953  		REG_F0 + 5,
  2954  		REG_F0 + 6,
  2955  		REG_F0 + 7:
  2956  		return Yrf
  2957  
  2958  	case REG_M0 + 0,
  2959  		REG_M0 + 1,
  2960  		REG_M0 + 2,
  2961  		REG_M0 + 3,
  2962  		REG_M0 + 4,
  2963  		REG_M0 + 5,
  2964  		REG_M0 + 6,
  2965  		REG_M0 + 7:
  2966  		return Ymr
  2967  
  2968  	case REG_X0:
  2969  		return Yxr0
  2970  
  2971  	case REG_X0 + 1,
  2972  		REG_X0 + 2,
  2973  		REG_X0 + 3,
  2974  		REG_X0 + 4,
  2975  		REG_X0 + 5,
  2976  		REG_X0 + 6,
  2977  		REG_X0 + 7,
  2978  		REG_X0 + 8,
  2979  		REG_X0 + 9,
  2980  		REG_X0 + 10,
  2981  		REG_X0 + 11,
  2982  		REG_X0 + 12,
  2983  		REG_X0 + 13,
  2984  		REG_X0 + 14,
  2985  		REG_X0 + 15:
  2986  		return Yxr
  2987  
  2988  	case REG_X0 + 16,
  2989  		REG_X0 + 17,
  2990  		REG_X0 + 18,
  2991  		REG_X0 + 19,
  2992  		REG_X0 + 20,
  2993  		REG_X0 + 21,
  2994  		REG_X0 + 22,
  2995  		REG_X0 + 23,
  2996  		REG_X0 + 24,
  2997  		REG_X0 + 25,
  2998  		REG_X0 + 26,
  2999  		REG_X0 + 27,
  3000  		REG_X0 + 28,
  3001  		REG_X0 + 29,
  3002  		REG_X0 + 30,
  3003  		REG_X0 + 31:
  3004  		return YxrEvex
  3005  
  3006  	case REG_Y0 + 0,
  3007  		REG_Y0 + 1,
  3008  		REG_Y0 + 2,
  3009  		REG_Y0 + 3,
  3010  		REG_Y0 + 4,
  3011  		REG_Y0 + 5,
  3012  		REG_Y0 + 6,
  3013  		REG_Y0 + 7,
  3014  		REG_Y0 + 8,
  3015  		REG_Y0 + 9,
  3016  		REG_Y0 + 10,
  3017  		REG_Y0 + 11,
  3018  		REG_Y0 + 12,
  3019  		REG_Y0 + 13,
  3020  		REG_Y0 + 14,
  3021  		REG_Y0 + 15:
  3022  		return Yyr
  3023  
  3024  	case REG_Y0 + 16,
  3025  		REG_Y0 + 17,
  3026  		REG_Y0 + 18,
  3027  		REG_Y0 + 19,
  3028  		REG_Y0 + 20,
  3029  		REG_Y0 + 21,
  3030  		REG_Y0 + 22,
  3031  		REG_Y0 + 23,
  3032  		REG_Y0 + 24,
  3033  		REG_Y0 + 25,
  3034  		REG_Y0 + 26,
  3035  		REG_Y0 + 27,
  3036  		REG_Y0 + 28,
  3037  		REG_Y0 + 29,
  3038  		REG_Y0 + 30,
  3039  		REG_Y0 + 31:
  3040  		return YyrEvex
  3041  
  3042  	case REG_Z0 + 0,
  3043  		REG_Z0 + 1,
  3044  		REG_Z0 + 2,
  3045  		REG_Z0 + 3,
  3046  		REG_Z0 + 4,
  3047  		REG_Z0 + 5,
  3048  		REG_Z0 + 6,
  3049  		REG_Z0 + 7:
  3050  		return Yzr
  3051  
  3052  	case REG_Z0 + 8,
  3053  		REG_Z0 + 9,
  3054  		REG_Z0 + 10,
  3055  		REG_Z0 + 11,
  3056  		REG_Z0 + 12,
  3057  		REG_Z0 + 13,
  3058  		REG_Z0 + 14,
  3059  		REG_Z0 + 15,
  3060  		REG_Z0 + 16,
  3061  		REG_Z0 + 17,
  3062  		REG_Z0 + 18,
  3063  		REG_Z0 + 19,
  3064  		REG_Z0 + 20,
  3065  		REG_Z0 + 21,
  3066  		REG_Z0 + 22,
  3067  		REG_Z0 + 23,
  3068  		REG_Z0 + 24,
  3069  		REG_Z0 + 25,
  3070  		REG_Z0 + 26,
  3071  		REG_Z0 + 27,
  3072  		REG_Z0 + 28,
  3073  		REG_Z0 + 29,
  3074  		REG_Z0 + 30,
  3075  		REG_Z0 + 31:
  3076  		if ctxt.Arch.Family == sys.I386 {
  3077  			return Yxxx
  3078  		}
  3079  		return Yzr
  3080  
  3081  	case REG_K0:
  3082  		return Yk0
  3083  
  3084  	case REG_K0 + 1,
  3085  		REG_K0 + 2,
  3086  		REG_K0 + 3,
  3087  		REG_K0 + 4,
  3088  		REG_K0 + 5,
  3089  		REG_K0 + 6,
  3090  		REG_K0 + 7:
  3091  		return Yknot0
  3092  
  3093  	case REG_CS:
  3094  		return Ycs
  3095  	case REG_SS:
  3096  		return Yss
  3097  	case REG_DS:
  3098  		return Yds
  3099  	case REG_ES:
  3100  		return Yes
  3101  	case REG_FS:
  3102  		return Yfs
  3103  	case REG_GS:
  3104  		return Ygs
  3105  	case REG_TLS:
  3106  		return Ytls
  3107  
  3108  	case REG_GDTR:
  3109  		return Ygdtr
  3110  	case REG_IDTR:
  3111  		return Yidtr
  3112  	case REG_LDTR:
  3113  		return Yldtr
  3114  	case REG_MSW:
  3115  		return Ymsw
  3116  	case REG_TASK:
  3117  		return Ytask
  3118  
  3119  	case REG_CR + 0:
  3120  		return Ycr0
  3121  	case REG_CR + 1:
  3122  		return Ycr1
  3123  	case REG_CR + 2:
  3124  		return Ycr2
  3125  	case REG_CR + 3:
  3126  		return Ycr3
  3127  	case REG_CR + 4:
  3128  		return Ycr4
  3129  	case REG_CR + 5:
  3130  		return Ycr5
  3131  	case REG_CR + 6:
  3132  		return Ycr6
  3133  	case REG_CR + 7:
  3134  		return Ycr7
  3135  	case REG_CR + 8:
  3136  		return Ycr8
  3137  
  3138  	case REG_DR + 0:
  3139  		return Ydr0
  3140  	case REG_DR + 1:
  3141  		return Ydr1
  3142  	case REG_DR + 2:
  3143  		return Ydr2
  3144  	case REG_DR + 3:
  3145  		return Ydr3
  3146  	case REG_DR + 4:
  3147  		return Ydr4
  3148  	case REG_DR + 5:
  3149  		return Ydr5
  3150  	case REG_DR + 6:
  3151  		return Ydr6
  3152  	case REG_DR + 7:
  3153  		return Ydr7
  3154  
  3155  	case REG_TR + 0:
  3156  		return Ytr0
  3157  	case REG_TR + 1:
  3158  		return Ytr1
  3159  	case REG_TR + 2:
  3160  		return Ytr2
  3161  	case REG_TR + 3:
  3162  		return Ytr3
  3163  	case REG_TR + 4:
  3164  		return Ytr4
  3165  	case REG_TR + 5:
  3166  		return Ytr5
  3167  	case REG_TR + 6:
  3168  		return Ytr6
  3169  	case REG_TR + 7:
  3170  		return Ytr7
  3171  	}
  3172  
  3173  	return Yxxx
  3174  }
  3175  
  3176  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  3177  // and hold assembly state.
  3178  type AsmBuf struct {
  3179  	buf      [100]byte
  3180  	off      int
  3181  	rexflag  int
  3182  	vexflag  bool // Per inst: true for VEX-encoded
  3183  	evexflag bool // Per inst: true for EVEX-encoded
  3184  	rep      bool
  3185  	repn     bool
  3186  	lock     bool
  3187  
  3188  	evex evexBits // Initialized when evexflag is true
  3189  }
  3190  
  3191  // Put1 appends one byte to the end of the buffer.
  3192  func (ab *AsmBuf) Put1(x byte) {
  3193  	ab.buf[ab.off] = x
  3194  	ab.off++
  3195  }
  3196  
  3197  // Put2 appends two bytes to the end of the buffer.
  3198  func (ab *AsmBuf) Put2(x, y byte) {
  3199  	ab.buf[ab.off+0] = x
  3200  	ab.buf[ab.off+1] = y
  3201  	ab.off += 2
  3202  }
  3203  
  3204  // Put3 appends three bytes to the end of the buffer.
  3205  func (ab *AsmBuf) Put3(x, y, z byte) {
  3206  	ab.buf[ab.off+0] = x
  3207  	ab.buf[ab.off+1] = y
  3208  	ab.buf[ab.off+2] = z
  3209  	ab.off += 3
  3210  }
  3211  
  3212  // Put4 appends four bytes to the end of the buffer.
  3213  func (ab *AsmBuf) Put4(x, y, z, w byte) {
  3214  	ab.buf[ab.off+0] = x
  3215  	ab.buf[ab.off+1] = y
  3216  	ab.buf[ab.off+2] = z
  3217  	ab.buf[ab.off+3] = w
  3218  	ab.off += 4
  3219  }
  3220  
  3221  // PutInt16 writes v into the buffer using little-endian encoding.
  3222  func (ab *AsmBuf) PutInt16(v int16) {
  3223  	ab.buf[ab.off+0] = byte(v)
  3224  	ab.buf[ab.off+1] = byte(v >> 8)
  3225  	ab.off += 2
  3226  }
  3227  
  3228  // PutInt32 writes v into the buffer using little-endian encoding.
  3229  func (ab *AsmBuf) PutInt32(v int32) {
  3230  	ab.buf[ab.off+0] = byte(v)
  3231  	ab.buf[ab.off+1] = byte(v >> 8)
  3232  	ab.buf[ab.off+2] = byte(v >> 16)
  3233  	ab.buf[ab.off+3] = byte(v >> 24)
  3234  	ab.off += 4
  3235  }
  3236  
  3237  // PutInt64 writes v into the buffer using little-endian encoding.
  3238  func (ab *AsmBuf) PutInt64(v int64) {
  3239  	ab.buf[ab.off+0] = byte(v)
  3240  	ab.buf[ab.off+1] = byte(v >> 8)
  3241  	ab.buf[ab.off+2] = byte(v >> 16)
  3242  	ab.buf[ab.off+3] = byte(v >> 24)
  3243  	ab.buf[ab.off+4] = byte(v >> 32)
  3244  	ab.buf[ab.off+5] = byte(v >> 40)
  3245  	ab.buf[ab.off+6] = byte(v >> 48)
  3246  	ab.buf[ab.off+7] = byte(v >> 56)
  3247  	ab.off += 8
  3248  }
  3249  
  3250  // Put copies b into the buffer.
  3251  func (ab *AsmBuf) Put(b []byte) {
  3252  	copy(ab.buf[ab.off:], b)
  3253  	ab.off += len(b)
  3254  }
  3255  
  3256  // PutOpBytesLit writes zero terminated sequence of bytes from op,
  3257  // starting at specified offset (e.g. z counter value).
  3258  // Trailing 0 is not written.
  3259  //
  3260  // Intended to be used for literal Z cases.
  3261  // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
  3262  func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
  3263  	for int(op[offset]) != 0 {
  3264  		ab.Put1(byte(op[offset]))
  3265  		offset++
  3266  	}
  3267  }
  3268  
  3269  // Insert inserts b at offset i.
  3270  func (ab *AsmBuf) Insert(i int, b byte) {
  3271  	ab.off++
  3272  	copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
  3273  	ab.buf[i] = b
  3274  }
  3275  
  3276  // Last returns the byte at the end of the buffer.
  3277  func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] }
  3278  
  3279  // Len returns the length of the buffer.
  3280  func (ab *AsmBuf) Len() int { return ab.off }
  3281  
  3282  // Bytes returns the contents of the buffer.
  3283  func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] }
  3284  
  3285  // Reset empties the buffer.
  3286  func (ab *AsmBuf) Reset() { ab.off = 0 }
  3287  
  3288  // At returns the byte at offset i.
  3289  func (ab *AsmBuf) At(i int) byte { return ab.buf[i] }
  3290  
  3291  // asmidx emits SIB byte.
  3292  func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  3293  	var i int
  3294  
  3295  	// X/Y index register is used in VSIB.
  3296  	switch index {
  3297  	default:
  3298  		goto bad
  3299  
  3300  	case REG_NONE:
  3301  		i = 4 << 3
  3302  		goto bas
  3303  
  3304  	case REG_R8,
  3305  		REG_R9,
  3306  		REG_R10,
  3307  		REG_R11,
  3308  		REG_R12,
  3309  		REG_R13,
  3310  		REG_R14,
  3311  		REG_R15,
  3312  		REG_X8,
  3313  		REG_X9,
  3314  		REG_X10,
  3315  		REG_X11,
  3316  		REG_X12,
  3317  		REG_X13,
  3318  		REG_X14,
  3319  		REG_X15,
  3320  		REG_X16,
  3321  		REG_X17,
  3322  		REG_X18,
  3323  		REG_X19,
  3324  		REG_X20,
  3325  		REG_X21,
  3326  		REG_X22,
  3327  		REG_X23,
  3328  		REG_X24,
  3329  		REG_X25,
  3330  		REG_X26,
  3331  		REG_X27,
  3332  		REG_X28,
  3333  		REG_X29,
  3334  		REG_X30,
  3335  		REG_X31,
  3336  		REG_Y8,
  3337  		REG_Y9,
  3338  		REG_Y10,
  3339  		REG_Y11,
  3340  		REG_Y12,
  3341  		REG_Y13,
  3342  		REG_Y14,
  3343  		REG_Y15,
  3344  		REG_Y16,
  3345  		REG_Y17,
  3346  		REG_Y18,
  3347  		REG_Y19,
  3348  		REG_Y20,
  3349  		REG_Y21,
  3350  		REG_Y22,
  3351  		REG_Y23,
  3352  		REG_Y24,
  3353  		REG_Y25,
  3354  		REG_Y26,
  3355  		REG_Y27,
  3356  		REG_Y28,
  3357  		REG_Y29,
  3358  		REG_Y30,
  3359  		REG_Y31,
  3360  		REG_Z8,
  3361  		REG_Z9,
  3362  		REG_Z10,
  3363  		REG_Z11,
  3364  		REG_Z12,
  3365  		REG_Z13,
  3366  		REG_Z14,
  3367  		REG_Z15,
  3368  		REG_Z16,
  3369  		REG_Z17,
  3370  		REG_Z18,
  3371  		REG_Z19,
  3372  		REG_Z20,
  3373  		REG_Z21,
  3374  		REG_Z22,
  3375  		REG_Z23,
  3376  		REG_Z24,
  3377  		REG_Z25,
  3378  		REG_Z26,
  3379  		REG_Z27,
  3380  		REG_Z28,
  3381  		REG_Z29,
  3382  		REG_Z30,
  3383  		REG_Z31:
  3384  		if ctxt.Arch.Family == sys.I386 {
  3385  			goto bad
  3386  		}
  3387  		fallthrough
  3388  
  3389  	case REG_AX,
  3390  		REG_CX,
  3391  		REG_DX,
  3392  		REG_BX,
  3393  		REG_BP,
  3394  		REG_SI,
  3395  		REG_DI,
  3396  		REG_X0,
  3397  		REG_X1,
  3398  		REG_X2,
  3399  		REG_X3,
  3400  		REG_X4,
  3401  		REG_X5,
  3402  		REG_X6,
  3403  		REG_X7,
  3404  		REG_Y0,
  3405  		REG_Y1,
  3406  		REG_Y2,
  3407  		REG_Y3,
  3408  		REG_Y4,
  3409  		REG_Y5,
  3410  		REG_Y6,
  3411  		REG_Y7,
  3412  		REG_Z0,
  3413  		REG_Z1,
  3414  		REG_Z2,
  3415  		REG_Z3,
  3416  		REG_Z4,
  3417  		REG_Z5,
  3418  		REG_Z6,
  3419  		REG_Z7:
  3420  		i = reg[index] << 3
  3421  	}
  3422  
  3423  	switch scale {
  3424  	default:
  3425  		goto bad
  3426  
  3427  	case 1:
  3428  		break
  3429  
  3430  	case 2:
  3431  		i |= 1 << 6
  3432  
  3433  	case 4:
  3434  		i |= 2 << 6
  3435  
  3436  	case 8:
  3437  		i |= 3 << 6
  3438  	}
  3439  
  3440  bas:
  3441  	switch base {
  3442  	default:
  3443  		goto bad
  3444  
  3445  	case REG_NONE: // must be mod=00
  3446  		i |= 5
  3447  
  3448  	case REG_R8,
  3449  		REG_R9,
  3450  		REG_R10,
  3451  		REG_R11,
  3452  		REG_R12,
  3453  		REG_R13,
  3454  		REG_R14,
  3455  		REG_R15:
  3456  		if ctxt.Arch.Family == sys.I386 {
  3457  			goto bad
  3458  		}
  3459  		fallthrough
  3460  
  3461  	case REG_AX,
  3462  		REG_CX,
  3463  		REG_DX,
  3464  		REG_BX,
  3465  		REG_SP,
  3466  		REG_BP,
  3467  		REG_SI,
  3468  		REG_DI:
  3469  		i |= reg[base]
  3470  	}
  3471  
  3472  	ab.Put1(byte(i))
  3473  	return
  3474  
  3475  bad:
  3476  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  3477  	ab.Put1(0)
  3478  }
  3479  
  3480  func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  3481  	var rel obj.Reloc
  3482  
  3483  	v := vaddr(ctxt, p, a, &rel)
  3484  	if rel.Siz != 0 {
  3485  		if rel.Siz != 4 {
  3486  			ctxt.Diag("bad reloc")
  3487  		}
  3488  		r := obj.Addrel(cursym)
  3489  		*r = rel
  3490  		r.Off = int32(p.Pc + int64(ab.Len()))
  3491  	}
  3492  
  3493  	ab.PutInt32(int32(v))
  3494  }
  3495  
  3496  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  3497  	if r != nil {
  3498  		*r = obj.Reloc{}
  3499  	}
  3500  
  3501  	switch a.Name {
  3502  	case obj.NAME_STATIC,
  3503  		obj.NAME_GOTREF,
  3504  		obj.NAME_EXTERN:
  3505  		s := a.Sym
  3506  		if r == nil {
  3507  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3508  			log.Fatalf("reloc")
  3509  		}
  3510  
  3511  		if a.Name == obj.NAME_GOTREF {
  3512  			r.Siz = 4
  3513  			r.Type = objabi.R_GOTPCREL
  3514  		} else if useAbs(ctxt, s) {
  3515  			r.Siz = 4
  3516  			r.Type = objabi.R_ADDR
  3517  		} else {
  3518  			r.Siz = 4
  3519  			r.Type = objabi.R_PCREL
  3520  		}
  3521  
  3522  		r.Off = -1 // caller must fill in
  3523  		r.Sym = s
  3524  		r.Add = a.Offset
  3525  
  3526  		return 0
  3527  	}
  3528  
  3529  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  3530  		if r == nil {
  3531  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3532  			log.Fatalf("reloc")
  3533  		}
  3534  
  3535  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  3536  			r.Type = objabi.R_TLS_LE
  3537  			r.Siz = 4
  3538  			r.Off = -1 // caller must fill in
  3539  			r.Add = a.Offset
  3540  		}
  3541  		return 0
  3542  	}
  3543  
  3544  	return a.Offset
  3545  }
  3546  
  3547  func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  3548  	var base int
  3549  	var rel obj.Reloc
  3550  
  3551  	rex &= 0x40 | Rxr
  3552  	if a.Offset != int64(int32(a.Offset)) {
  3553  		// The rules are slightly different for 386 and AMD64,
  3554  		// mostly for historical reasons. We may unify them later,
  3555  		// but it must be discussed beforehand.
  3556  		//
  3557  		// For 64bit mode only LEAL is allowed to overflow.
  3558  		// It's how https://golang.org/cl/59630 made it.
  3559  		// crypto/sha1/sha1block_amd64.s depends on this feature.
  3560  		//
  3561  		// For 32bit mode rules are more permissive.
  3562  		// If offset fits uint32, it's permitted.
  3563  		// This is allowed for assembly that wants to use 32-bit hex
  3564  		// constants, e.g. LEAL 0x99999999(AX), AX.
  3565  		overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
  3566  			(ctxt.Arch.Family != sys.AMD64 &&
  3567  				int64(uint32(a.Offset)) == a.Offset &&
  3568  				ab.rexflag&Rxw == 0)
  3569  		if !overflowOK {
  3570  			ctxt.Diag("offset too large in %s", p)
  3571  		}
  3572  	}
  3573  	v := int32(a.Offset)
  3574  	rel.Siz = 0
  3575  
  3576  	switch a.Type {
  3577  	case obj.TYPE_ADDR:
  3578  		if a.Name == obj.NAME_NONE {
  3579  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  3580  		}
  3581  		if a.Index == REG_TLS {
  3582  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  3583  		}
  3584  		goto bad
  3585  
  3586  	case obj.TYPE_REG:
  3587  		const regFirst = REG_AL
  3588  		const regLast = REG_Z31
  3589  		if a.Reg < regFirst || regLast < a.Reg {
  3590  			goto bad
  3591  		}
  3592  		if v != 0 {
  3593  			goto bad
  3594  		}
  3595  		ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  3596  		ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  3597  		return
  3598  	}
  3599  
  3600  	if a.Type != obj.TYPE_MEM {
  3601  		goto bad
  3602  	}
  3603  
  3604  	if a.Index != REG_NONE && a.Index != REG_TLS {
  3605  		base := int(a.Reg)
  3606  		switch a.Name {
  3607  		case obj.NAME_EXTERN,
  3608  			obj.NAME_GOTREF,
  3609  			obj.NAME_STATIC:
  3610  			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  3611  				goto bad
  3612  			}
  3613  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3614  				// The base register has already been set. It holds the PC
  3615  				// of this instruction returned by a PC-reading thunk.
  3616  				// See obj6.go:rewriteToPcrel.
  3617  			} else {
  3618  				base = REG_NONE
  3619  			}
  3620  			v = int32(vaddr(ctxt, p, a, &rel))
  3621  
  3622  		case obj.NAME_AUTO,
  3623  			obj.NAME_PARAM:
  3624  			base = REG_SP
  3625  		}
  3626  
  3627  		ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  3628  		if base == REG_NONE {
  3629  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3630  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3631  			goto putrelv
  3632  		}
  3633  
  3634  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3635  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3636  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3637  			return
  3638  		}
  3639  
  3640  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3641  			ab.Put1(byte(1<<6 | 4<<0 | r<<3))
  3642  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3643  			ab.Put1(disp8)
  3644  			return
  3645  		}
  3646  
  3647  		ab.Put1(byte(2<<6 | 4<<0 | r<<3))
  3648  		ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3649  		goto putrelv
  3650  	}
  3651  
  3652  	base = int(a.Reg)
  3653  	switch a.Name {
  3654  	case obj.NAME_STATIC,
  3655  		obj.NAME_GOTREF,
  3656  		obj.NAME_EXTERN:
  3657  		if a.Sym == nil {
  3658  			ctxt.Diag("bad addr: %v", p)
  3659  		}
  3660  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3661  			// The base register has already been set. It holds the PC
  3662  			// of this instruction returned by a PC-reading thunk.
  3663  			// See obj6.go:rewriteToPcrel.
  3664  		} else {
  3665  			base = REG_NONE
  3666  		}
  3667  		v = int32(vaddr(ctxt, p, a, &rel))
  3668  
  3669  	case obj.NAME_AUTO,
  3670  		obj.NAME_PARAM:
  3671  		base = REG_SP
  3672  	}
  3673  
  3674  	if base == REG_TLS {
  3675  		v = int32(vaddr(ctxt, p, a, &rel))
  3676  	}
  3677  
  3678  	ab.rexflag |= regrex[base]&Rxb | rex
  3679  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  3680  		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  3681  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  3682  				ctxt.Diag("%v has offset against gotref", p)
  3683  			}
  3684  			ab.Put1(byte(0<<6 | 5<<0 | r<<3))
  3685  			goto putrelv
  3686  		}
  3687  
  3688  		// temporary
  3689  		ab.Put2(
  3690  			byte(0<<6|4<<0|r<<3), // sib present
  3691  			0<<6|4<<3|5<<0,       // DS:d32
  3692  		)
  3693  		goto putrelv
  3694  	}
  3695  
  3696  	if base == REG_SP || base == REG_R12 {
  3697  		if v == 0 {
  3698  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3699  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3700  			return
  3701  		}
  3702  
  3703  		if disp8, ok := toDisp8(v, p, ab); ok {
  3704  			ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  3705  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3706  			ab.Put1(disp8)
  3707  			return
  3708  		}
  3709  
  3710  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3711  		ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3712  		goto putrelv
  3713  	}
  3714  
  3715  	if REG_AX <= base && base <= REG_R15 {
  3716  		if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid {
  3717  			rel = obj.Reloc{}
  3718  			rel.Type = objabi.R_TLS_LE
  3719  			rel.Siz = 4
  3720  			rel.Sym = nil
  3721  			rel.Add = int64(v)
  3722  			v = 0
  3723  		}
  3724  
  3725  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3726  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3727  			return
  3728  		}
  3729  
  3730  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3731  			ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
  3732  			return
  3733  		}
  3734  
  3735  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3736  		goto putrelv
  3737  	}
  3738  
  3739  	goto bad
  3740  
  3741  putrelv:
  3742  	if rel.Siz != 0 {
  3743  		if rel.Siz != 4 {
  3744  			ctxt.Diag("bad rel")
  3745  			goto bad
  3746  		}
  3747  
  3748  		r := obj.Addrel(cursym)
  3749  		*r = rel
  3750  		r.Off = int32(p.Pc + int64(ab.Len()))
  3751  	}
  3752  
  3753  	ab.PutInt32(v)
  3754  	return
  3755  
  3756  bad:
  3757  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3758  }
  3759  
  3760  func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3761  	ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3762  }
  3763  
  3764  func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3765  	ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3766  }
  3767  
  3768  func bytereg(a *obj.Addr, t *uint8) {
  3769  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3770  		a.Reg += REG_AL - REG_AX
  3771  		*t = 0
  3772  	}
  3773  }
  3774  
  3775  func unbytereg(a *obj.Addr, t *uint8) {
  3776  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3777  		a.Reg += REG_AX - REG_AL
  3778  		*t = 0
  3779  	}
  3780  }
  3781  
  3782  const (
  3783  	movLit uint8 = iota // Like Zlit
  3784  	movRegMem
  3785  	movMemReg
  3786  	movRegMem2op
  3787  	movMemReg2op
  3788  	movFullPtr // Load full pointer, trash heap (unsupported)
  3789  	movDoubleShift
  3790  	movTLSReg
  3791  )
  3792  
  3793  var ymovtab = []movtab{
  3794  	// push
  3795  	{APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
  3796  	{APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
  3797  	{APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
  3798  	{APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
  3799  	{APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3800  	{APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3801  	{APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3802  	{APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3803  	{APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
  3804  	{APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
  3805  	{APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
  3806  	{APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
  3807  	{APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
  3808  	{APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
  3809  
  3810  	// pop
  3811  	{APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
  3812  	{APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
  3813  	{APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
  3814  	{APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3815  	{APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3816  	{APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3817  	{APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3818  	{APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
  3819  	{APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
  3820  	{APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
  3821  	{APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
  3822  	{APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
  3823  
  3824  	// mov seg
  3825  	{AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
  3826  	{AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
  3827  	{AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
  3828  	{AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
  3829  	{AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
  3830  	{AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
  3831  	{AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
  3832  	{AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
  3833  	{AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
  3834  	{AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
  3835  	{AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
  3836  	{AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
  3837  
  3838  	// mov cr
  3839  	{AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3840  	{AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3841  	{AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3842  	{AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3843  	{AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3844  	{AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3845  	{AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3846  	{AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3847  	{AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3848  	{AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3849  	{AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3850  	{AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3851  	{AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3852  	{AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3853  	{AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3854  	{AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3855  	{AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3856  	{AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3857  	{AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3858  	{AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3859  
  3860  	// mov dr
  3861  	{AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3862  	{AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3863  	{AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3864  	{AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3865  	{AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
  3866  	{AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
  3867  	{AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3868  	{AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3869  	{AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3870  	{AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3871  	{AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3872  	{AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3873  	{AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
  3874  	{AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
  3875  	{AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3876  	{AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3877  
  3878  	// mov tr
  3879  	{AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
  3880  	{AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
  3881  	{AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
  3882  	{AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
  3883  
  3884  	// lgdt, sgdt, lidt, sidt
  3885  	{AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3886  	{AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3887  	{AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3888  	{AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3889  	{AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3890  	{AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3891  	{AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3892  	{AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3893  
  3894  	// lldt, sldt
  3895  	{AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
  3896  	{AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
  3897  
  3898  	// lmsw, smsw
  3899  	{AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
  3900  	{AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
  3901  
  3902  	// ltr, str
  3903  	{AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
  3904  	{AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
  3905  
  3906  	/* load full pointer - unsupported
  3907  	{AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
  3908  	{AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
  3909  	*/
  3910  
  3911  	// double shift
  3912  	{ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3913  	{ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3914  	{ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3915  	{ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3916  	{ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3917  	{ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3918  	{ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3919  	{ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3920  	{ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3921  	{ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3922  	{ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3923  	{ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3924  	{ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3925  	{ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3926  	{ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3927  	{ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3928  	{ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3929  	{ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3930  
  3931  	// load TLS base
  3932  	{AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3933  	{AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3934  	{0, 0, 0, 0, 0, [4]uint8{}},
  3935  }
  3936  
  3937  func isax(a *obj.Addr) bool {
  3938  	switch a.Reg {
  3939  	case REG_AX, REG_AL, REG_AH:
  3940  		return true
  3941  	}
  3942  
  3943  	if a.Index == REG_AX {
  3944  		return true
  3945  	}
  3946  	return false
  3947  }
  3948  
  3949  func subreg(p *obj.Prog, from int, to int) {
  3950  	if false { /* debug['Q'] */
  3951  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3952  	}
  3953  
  3954  	if int(p.From.Reg) == from {
  3955  		p.From.Reg = int16(to)
  3956  		p.Ft = 0
  3957  	}
  3958  
  3959  	if int(p.To.Reg) == from {
  3960  		p.To.Reg = int16(to)
  3961  		p.Tt = 0
  3962  	}
  3963  
  3964  	if int(p.From.Index) == from {
  3965  		p.From.Index = int16(to)
  3966  		p.Ft = 0
  3967  	}
  3968  
  3969  	if int(p.To.Index) == from {
  3970  		p.To.Index = int16(to)
  3971  		p.Tt = 0
  3972  	}
  3973  
  3974  	if false { /* debug['Q'] */
  3975  		fmt.Printf("%v\n", p)
  3976  	}
  3977  }
  3978  
  3979  func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3980  	switch op {
  3981  	case Pm, Pe, Pf2, Pf3:
  3982  		if osize != 1 {
  3983  			if op != Pm {
  3984  				ab.Put1(byte(op))
  3985  			}
  3986  			ab.Put1(Pm)
  3987  			z++
  3988  			op = int(o.op[z])
  3989  			break
  3990  		}
  3991  		fallthrough
  3992  
  3993  	default:
  3994  		if ab.Len() == 0 || ab.Last() != Pm {
  3995  			ab.Put1(Pm)
  3996  		}
  3997  	}
  3998  
  3999  	ab.Put1(byte(op))
  4000  	return z
  4001  }
  4002  
  4003  var bpduff1 = []byte{
  4004  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  4005  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  4006  }
  4007  
  4008  var bpduff2 = []byte{
  4009  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  4010  }
  4011  
  4012  // asmevex emits EVEX pregis and opcode byte.
  4013  // In addition to asmvex r/m, vvvv and reg fields also requires optional
  4014  // K-masking register.
  4015  //
  4016  // Expects asmbuf.evex to be properly initialized.
  4017  func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
  4018  	ab.evexflag = true
  4019  	evex := ab.evex
  4020  
  4021  	rexR := byte(1)
  4022  	evexR := byte(1)
  4023  	rexX := byte(1)
  4024  	rexB := byte(1)
  4025  	if r != nil {
  4026  		if regrex[r.Reg]&Rxr != 0 {
  4027  			rexR = 0 // "ModR/M.reg" selector 4th bit.
  4028  		}
  4029  		if regrex[r.Reg]&RxrEvex != 0 {
  4030  			evexR = 0 // "ModR/M.reg" selector 5th bit.
  4031  		}
  4032  	}
  4033  	if rm != nil {
  4034  		if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
  4035  			rexX = 0
  4036  		} else if regrex[rm.Index]&Rxx != 0 {
  4037  			rexX = 0
  4038  		}
  4039  		if regrex[rm.Reg]&Rxb != 0 {
  4040  			rexB = 0
  4041  		}
  4042  	}
  4043  	// P0 = [R][X][B][R'][00][mm]
  4044  	p0 := (rexR << 7) |
  4045  		(rexX << 6) |
  4046  		(rexB << 5) |
  4047  		(evexR << 4) |
  4048  		(0 << 2) |
  4049  		(evex.M() << 0)
  4050  
  4051  	vexV := byte(0)
  4052  	if v != nil {
  4053  		// 4bit-wide reg index.
  4054  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4055  	}
  4056  	vexV ^= 0x0F
  4057  	// P1 = [W][vvvv][1][pp]
  4058  	p1 := (evex.W() << 7) |
  4059  		(vexV << 3) |
  4060  		(1 << 2) |
  4061  		(evex.P() << 0)
  4062  
  4063  	suffix := evexSuffixMap[p.Scond]
  4064  	evexZ := byte(0)
  4065  	evexLL := evex.L()
  4066  	evexB := byte(0)
  4067  	evexV := byte(1)
  4068  	evexA := byte(0)
  4069  	if suffix.zeroing {
  4070  		if !evex.ZeroingEnabled() {
  4071  			ctxt.Diag("unsupported zeroing: %v", p)
  4072  		}
  4073  		evexZ = 1
  4074  	}
  4075  	switch {
  4076  	case suffix.rounding != rcUnset:
  4077  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4078  			ctxt.Diag("illegal rounding with memory argument: %v", p)
  4079  		} else if !evex.RoundingEnabled() {
  4080  			ctxt.Diag("unsupported rounding: %v", p)
  4081  		}
  4082  		evexB = 1
  4083  		evexLL = suffix.rounding
  4084  	case suffix.broadcast:
  4085  		if rm == nil || rm.Type != obj.TYPE_MEM {
  4086  			ctxt.Diag("illegal broadcast without memory argument: %v", p)
  4087  		} else if !evex.BroadcastEnabled() {
  4088  			ctxt.Diag("unsupported broadcast: %v", p)
  4089  		}
  4090  		evexB = 1
  4091  	case suffix.sae:
  4092  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4093  			ctxt.Diag("illegal SAE with memory argument: %v", p)
  4094  		} else if !evex.SaeEnabled() {
  4095  			ctxt.Diag("unsupported SAE: %v", p)
  4096  		}
  4097  		evexB = 1
  4098  	}
  4099  	if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
  4100  		evexV = 0
  4101  	} else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
  4102  		evexV = 0 // VSR selector 5th bit.
  4103  	}
  4104  	if k != nil {
  4105  		evexA = byte(reg[k.Reg])
  4106  	}
  4107  	// P2 = [z][L'L][b][V'][aaa]
  4108  	p2 := (evexZ << 7) |
  4109  		(evexLL << 5) |
  4110  		(evexB << 4) |
  4111  		(evexV << 3) |
  4112  		(evexA << 0)
  4113  
  4114  	const evexEscapeByte = 0x62
  4115  	ab.Put4(evexEscapeByte, p0, p1, p2)
  4116  	ab.Put1(evex.opcode)
  4117  }
  4118  
  4119  // Emit VEX prefix and opcode byte.
  4120  // The three addresses are the r/m, vvvv, and reg fields.
  4121  // The reg and rm arguments appear in the same order as the
  4122  // arguments to asmand, which typically follows the call to asmvex.
  4123  // The final two arguments are the VEX prefix (see encoding above)
  4124  // and the opcode byte.
  4125  // For details about vex prefix see:
  4126  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  4127  func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  4128  	ab.vexflag = true
  4129  	rexR := 0
  4130  	if r != nil {
  4131  		rexR = regrex[r.Reg] & Rxr
  4132  	}
  4133  	rexB := 0
  4134  	rexX := 0
  4135  	if rm != nil {
  4136  		rexB = regrex[rm.Reg] & Rxb
  4137  		rexX = regrex[rm.Index] & Rxx
  4138  	}
  4139  	vexM := (vex >> 3) & 0x7
  4140  	vexWLP := vex & 0x87
  4141  	vexV := byte(0)
  4142  	if v != nil {
  4143  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4144  	}
  4145  	vexV ^= 0xF
  4146  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  4147  		// Can use 2-byte encoding.
  4148  		ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  4149  	} else {
  4150  		// Must use 3-byte encoding.
  4151  		ab.Put3(0xc4,
  4152  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  4153  			vexV<<3|vexWLP,
  4154  		)
  4155  	}
  4156  	ab.Put1(opcode)
  4157  }
  4158  
  4159  // regIndex returns register index that fits in 5 bits.
  4160  //
  4161  //	R         : 3 bit | legacy instructions     | N/A
  4162  //	[R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
  4163  //	EVEX.R    : 1 bit | EVEX extension bit      | RxrEvex
  4164  //
  4165  // Examples:
  4166  //	REG_Z30 => 30
  4167  //	REG_X15 => 15
  4168  //	REG_R9  => 9
  4169  //	REG_AX  => 0
  4170  //
  4171  func regIndex(r int16) int {
  4172  	lower3bits := reg[r]
  4173  	high4bit := regrex[r] & Rxr << 1
  4174  	high5bit := regrex[r] & RxrEvex << 0
  4175  	return lower3bits | high4bit | high5bit
  4176  }
  4177  
  4178  // avx2gatherValid reports whether p satisfies AVX2 gather constraints.
  4179  // Reports errors via ctxt.
  4180  func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4181  	// If any pair of the index, mask, or destination registers
  4182  	// are the same, illegal instruction trap (#UD) is triggered.
  4183  	index := regIndex(p.GetFrom3().Index)
  4184  	mask := regIndex(p.From.Reg)
  4185  	dest := regIndex(p.To.Reg)
  4186  	if dest == mask || dest == index || mask == index {
  4187  		ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
  4188  		return false
  4189  	}
  4190  
  4191  	return true
  4192  }
  4193  
  4194  // avx512gatherValid reports whether p satisfies AVX512 gather constraints.
  4195  // Reports errors via ctxt.
  4196  func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4197  	// Illegal instruction trap (#UD) is triggered if the destination vector
  4198  	// register is the same as index vector in VSIB.
  4199  	index := regIndex(p.From.Index)
  4200  	dest := regIndex(p.To.Reg)
  4201  	if dest == index {
  4202  		ctxt.Diag("index and destination registers should be distinct: %v", p)
  4203  		return false
  4204  	}
  4205  
  4206  	return true
  4207  }
  4208  
  4209  func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4210  	o := opindex[p.As&obj.AMask]
  4211  
  4212  	if o == nil {
  4213  		ctxt.Diag("asmins: missing op %v", p)
  4214  		return
  4215  	}
  4216  
  4217  	if pre := prefixof(ctxt, &p.From); pre != 0 {
  4218  		ab.Put1(byte(pre))
  4219  	}
  4220  	if pre := prefixof(ctxt, &p.To); pre != 0 {
  4221  		ab.Put1(byte(pre))
  4222  	}
  4223  
  4224  	// Checks to warn about instruction/arguments combinations that
  4225  	// will unconditionally trigger illegal instruction trap (#UD).
  4226  	switch p.As {
  4227  	case AVGATHERDPD,
  4228  		AVGATHERQPD,
  4229  		AVGATHERDPS,
  4230  		AVGATHERQPS,
  4231  		AVPGATHERDD,
  4232  		AVPGATHERQD,
  4233  		AVPGATHERDQ,
  4234  		AVPGATHERQQ:
  4235  		// AVX512 gather requires explicit K mask.
  4236  		if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
  4237  			if !avx512gatherValid(ctxt, p) {
  4238  				return
  4239  			}
  4240  		} else {
  4241  			if !avx2gatherValid(ctxt, p) {
  4242  				return
  4243  			}
  4244  		}
  4245  	}
  4246  
  4247  	if p.Ft == 0 {
  4248  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  4249  	}
  4250  	if p.Tt == 0 {
  4251  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  4252  	}
  4253  
  4254  	ft := int(p.Ft) * Ymax
  4255  	var f3t int
  4256  	tt := int(p.Tt) * Ymax
  4257  
  4258  	xo := obj.Bool2int(o.op[0] == 0x0f)
  4259  	z := 0
  4260  	var a *obj.Addr
  4261  	var l int
  4262  	var op int
  4263  	var q *obj.Prog
  4264  	var r *obj.Reloc
  4265  	var rel obj.Reloc
  4266  	var v int64
  4267  
  4268  	args := make([]int, 0, argListMax)
  4269  	if ft != Ynone*Ymax {
  4270  		args = append(args, ft)
  4271  	}
  4272  	for i := range p.RestArgs {
  4273  		args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax)
  4274  	}
  4275  	if tt != Ynone*Ymax {
  4276  		args = append(args, tt)
  4277  	}
  4278  
  4279  	for _, yt := range o.ytab {
  4280  		// ytab matching is purely args-based,
  4281  		// but AVX512 suffixes like "Z" or "RU_SAE" will
  4282  		// add EVEX-only filter that will reject non-EVEX matches.
  4283  		//
  4284  		// Consider "VADDPD.BCST 2032(DX), X0, X0".
  4285  		// Without this rule, operands will lead to VEX-encoded form
  4286  		// and produce "c5b15813" encoding.
  4287  		if !yt.match(args) {
  4288  			// "xo" is always zero for VEX/EVEX encoded insts.
  4289  			z += int(yt.zoffset) + xo
  4290  		} else {
  4291  			if p.Scond != 0 && !evexZcase(yt.zcase) {
  4292  				// Do not signal error and continue to search
  4293  				// for matching EVEX-encoded form.
  4294  				z += int(yt.zoffset)
  4295  				continue
  4296  			}
  4297  
  4298  			switch o.prefix {
  4299  			case Px1: // first option valid only in 32-bit mode
  4300  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  4301  					z += int(yt.zoffset) + xo
  4302  					continue
  4303  				}
  4304  			case Pq: // 16 bit escape and opcode escape
  4305  				ab.Put2(Pe, Pm)
  4306  
  4307  			case Pq3: // 16 bit escape and opcode escape + REX.W
  4308  				ab.rexflag |= Pw
  4309  				ab.Put2(Pe, Pm)
  4310  
  4311  			case Pq4: // 66 0F 38
  4312  				ab.Put3(0x66, 0x0F, 0x38)
  4313  
  4314  			case Pq4w: // 66 0F 38 + REX.W
  4315  				ab.rexflag |= Pw
  4316  				ab.Put3(0x66, 0x0F, 0x38)
  4317  
  4318  			case Pq5: // F3 0F 38
  4319  				ab.Put3(0xF3, 0x0F, 0x38)
  4320  
  4321  			case Pq5w: //  F3 0F 38 + REX.W
  4322  				ab.rexflag |= Pw
  4323  				ab.Put3(0xF3, 0x0F, 0x38)
  4324  
  4325  			case Pf2, // xmm opcode escape
  4326  				Pf3:
  4327  				ab.Put2(o.prefix, Pm)
  4328  
  4329  			case Pef3:
  4330  				ab.Put3(Pe, Pf3, Pm)
  4331  
  4332  			case Pfw: // xmm opcode escape + REX.W
  4333  				ab.rexflag |= Pw
  4334  				ab.Put2(Pf3, Pm)
  4335  
  4336  			case Pm: // opcode escape
  4337  				ab.Put1(Pm)
  4338  
  4339  			case Pe: // 16 bit escape
  4340  				ab.Put1(Pe)
  4341  
  4342  			case Pw: // 64-bit escape
  4343  				if ctxt.Arch.Family != sys.AMD64 {
  4344  					ctxt.Diag("asmins: illegal 64: %v", p)
  4345  				}
  4346  				ab.rexflag |= Pw
  4347  
  4348  			case Pw8: // 64-bit escape if z >= 8
  4349  				if z >= 8 {
  4350  					if ctxt.Arch.Family != sys.AMD64 {
  4351  						ctxt.Diag("asmins: illegal 64: %v", p)
  4352  					}
  4353  					ab.rexflag |= Pw
  4354  				}
  4355  
  4356  			case Pb: // botch
  4357  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  4358  					goto bad
  4359  				}
  4360  				// NOTE(rsc): This is probably safe to do always,
  4361  				// but when enabled it chooses different encodings
  4362  				// than the old cmd/internal/obj/i386 code did,
  4363  				// which breaks our "same bits out" checks.
  4364  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  4365  				// in the original obj/i386, and it would encode
  4366  				// (using a valid, shorter form) as 3c 00 if we enabled
  4367  				// the call to bytereg here.
  4368  				if ctxt.Arch.Family == sys.AMD64 {
  4369  					bytereg(&p.From, &p.Ft)
  4370  					bytereg(&p.To, &p.Tt)
  4371  				}
  4372  
  4373  			case P32: // 32 bit but illegal if 64-bit mode
  4374  				if ctxt.Arch.Family == sys.AMD64 {
  4375  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  4376  				}
  4377  
  4378  			case Py: // 64-bit only, no prefix
  4379  				if ctxt.Arch.Family != sys.AMD64 {
  4380  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4381  				}
  4382  
  4383  			case Py1: // 64-bit only if z < 1, no prefix
  4384  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  4385  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4386  				}
  4387  
  4388  			case Py3: // 64-bit only if z < 3, no prefix
  4389  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  4390  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4391  				}
  4392  			}
  4393  
  4394  			if z >= len(o.op) {
  4395  				log.Fatalf("asmins bad table %v", p)
  4396  			}
  4397  			op = int(o.op[z])
  4398  			if op == 0x0f {
  4399  				ab.Put1(byte(op))
  4400  				z++
  4401  				op = int(o.op[z])
  4402  			}
  4403  
  4404  			switch yt.zcase {
  4405  			default:
  4406  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  4407  				return
  4408  
  4409  			case Zpseudo:
  4410  				break
  4411  
  4412  			case Zlit:
  4413  				ab.PutOpBytesLit(z, &o.op)
  4414  
  4415  			case Zlitr_m:
  4416  				ab.PutOpBytesLit(z, &o.op)
  4417  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4418  
  4419  			case Zlitm_r:
  4420  				ab.PutOpBytesLit(z, &o.op)
  4421  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4422  
  4423  			case Zlit_m_r:
  4424  				ab.PutOpBytesLit(z, &o.op)
  4425  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4426  
  4427  			case Zmb_r:
  4428  				bytereg(&p.From, &p.Ft)
  4429  				fallthrough
  4430  
  4431  			case Zm_r:
  4432  				ab.Put1(byte(op))
  4433  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4434  
  4435  			case Z_m_r:
  4436  				ab.Put1(byte(op))
  4437  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4438  
  4439  			case Zm2_r:
  4440  				ab.Put2(byte(op), o.op[z+1])
  4441  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4442  
  4443  			case Zm_r_xm:
  4444  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4445  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4446  
  4447  			case Zm_r_xm_nr:
  4448  				ab.rexflag = 0
  4449  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4450  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4451  
  4452  			case Zm_r_i_xm:
  4453  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4454  				ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
  4455  				ab.Put1(byte(p.To.Offset))
  4456  
  4457  			case Zibm_r, Zibr_m:
  4458  				ab.PutOpBytesLit(z, &o.op)
  4459  				if yt.zcase == Zibr_m {
  4460  					ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4461  				} else {
  4462  					ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4463  				}
  4464  				switch {
  4465  				default:
  4466  					ab.Put1(byte(p.From.Offset))
  4467  				case yt.args[0] == Yi32 && o.prefix == Pe:
  4468  					ab.PutInt16(int16(p.From.Offset))
  4469  				case yt.args[0] == Yi32:
  4470  					ab.PutInt32(int32(p.From.Offset))
  4471  				}
  4472  
  4473  			case Zaut_r:
  4474  				ab.Put1(0x8d) // leal
  4475  				if p.From.Type != obj.TYPE_ADDR {
  4476  					ctxt.Diag("asmins: Zaut sb type ADDR")
  4477  				}
  4478  				p.From.Type = obj.TYPE_MEM
  4479  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4480  				p.From.Type = obj.TYPE_ADDR
  4481  
  4482  			case Zm_o:
  4483  				ab.Put1(byte(op))
  4484  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4485  
  4486  			case Zr_m:
  4487  				ab.Put1(byte(op))
  4488  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4489  
  4490  			case Zvex:
  4491  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4492  
  4493  			case Zvex_rm_v_r:
  4494  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4495  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4496  
  4497  			case Zvex_rm_v_ro:
  4498  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4499  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4500  
  4501  			case Zvex_i_rm_vo:
  4502  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4503  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
  4504  				ab.Put1(byte(p.From.Offset))
  4505  
  4506  			case Zvex_i_r_v:
  4507  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4508  				regnum := byte(0x7)
  4509  				if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
  4510  					regnum &= byte(p.GetFrom3().Reg - REG_X0)
  4511  				} else {
  4512  					regnum &= byte(p.GetFrom3().Reg - REG_Y0)
  4513  				}
  4514  				ab.Put1(o.op[z+2] | regnum)
  4515  				ab.Put1(byte(p.From.Offset))
  4516  
  4517  			case Zvex_i_rm_v_r:
  4518  				imm, from, from3, to := unpackOps4(p)
  4519  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4520  				ab.asmand(ctxt, cursym, p, from, to)
  4521  				ab.Put1(byte(imm.Offset))
  4522  
  4523  			case Zvex_i_rm_r:
  4524  				ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
  4525  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4526  				ab.Put1(byte(p.From.Offset))
  4527  
  4528  			case Zvex_v_rm_r:
  4529  				ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
  4530  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4531  
  4532  			case Zvex_r_v_rm:
  4533  				ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
  4534  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4535  
  4536  			case Zvex_rm_r_vo:
  4537  				ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
  4538  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4539  
  4540  			case Zvex_i_r_rm:
  4541  				ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
  4542  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4543  				ab.Put1(byte(p.From.Offset))
  4544  
  4545  			case Zvex_hr_rm_v_r:
  4546  				hr, from, from3, to := unpackOps4(p)
  4547  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4548  				ab.asmand(ctxt, cursym, p, from, to)
  4549  				ab.Put1(byte(regIndex(hr.Reg) << 4))
  4550  
  4551  			case Zevex_k_rmo:
  4552  				ab.evex = newEVEXBits(z, &o.op)
  4553  				ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
  4554  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
  4555  
  4556  			case Zevex_i_rm_vo:
  4557  				ab.evex = newEVEXBits(z, &o.op)
  4558  				ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
  4559  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
  4560  				ab.Put1(byte(p.From.Offset))
  4561  
  4562  			case Zevex_i_rm_k_vo:
  4563  				imm, from, kmask, to := unpackOps4(p)
  4564  				ab.evex = newEVEXBits(z, &o.op)
  4565  				ab.asmevex(ctxt, p, from, to, nil, kmask)
  4566  				ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
  4567  				ab.Put1(byte(imm.Offset))
  4568  
  4569  			case Zevex_i_r_rm:
  4570  				ab.evex = newEVEXBits(z, &o.op)
  4571  				ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
  4572  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4573  				ab.Put1(byte(p.From.Offset))
  4574  
  4575  			case Zevex_i_r_k_rm:
  4576  				imm, from, kmask, to := unpackOps4(p)
  4577  				ab.evex = newEVEXBits(z, &o.op)
  4578  				ab.asmevex(ctxt, p, to, nil, from, kmask)
  4579  				ab.asmand(ctxt, cursym, p, to, from)
  4580  				ab.Put1(byte(imm.Offset))
  4581  
  4582  			case Zevex_i_rm_r:
  4583  				ab.evex = newEVEXBits(z, &o.op)
  4584  				ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
  4585  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4586  				ab.Put1(byte(p.From.Offset))
  4587  
  4588  			case Zevex_i_rm_k_r:
  4589  				imm, from, kmask, to := unpackOps4(p)
  4590  				ab.evex = newEVEXBits(z, &o.op)
  4591  				ab.asmevex(ctxt, p, from, nil, to, kmask)
  4592  				ab.asmand(ctxt, cursym, p, from, to)
  4593  				ab.Put1(byte(imm.Offset))
  4594  
  4595  			case Zevex_i_rm_v_r:
  4596  				imm, from, from3, to := unpackOps4(p)
  4597  				ab.evex = newEVEXBits(z, &o.op)
  4598  				ab.asmevex(ctxt, p, from, from3, to, nil)
  4599  				ab.asmand(ctxt, cursym, p, from, to)
  4600  				ab.Put1(byte(imm.Offset))
  4601  
  4602  			case Zevex_i_rm_v_k_r:
  4603  				imm, from, from3, kmask, to := unpackOps5(p)
  4604  				ab.evex = newEVEXBits(z, &o.op)
  4605  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4606  				ab.asmand(ctxt, cursym, p, from, to)
  4607  				ab.Put1(byte(imm.Offset))
  4608  
  4609  			case Zevex_r_v_rm:
  4610  				ab.evex = newEVEXBits(z, &o.op)
  4611  				ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
  4612  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4613  
  4614  			case Zevex_rm_v_r:
  4615  				ab.evex = newEVEXBits(z, &o.op)
  4616  				ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
  4617  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4618  
  4619  			case Zevex_rm_k_r:
  4620  				ab.evex = newEVEXBits(z, &o.op)
  4621  				ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
  4622  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4623  
  4624  			case Zevex_r_k_rm:
  4625  				ab.evex = newEVEXBits(z, &o.op)
  4626  				ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
  4627  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4628  
  4629  			case Zevex_rm_v_k_r:
  4630  				from, from3, kmask, to := unpackOps4(p)
  4631  				ab.evex = newEVEXBits(z, &o.op)
  4632  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4633  				ab.asmand(ctxt, cursym, p, from, to)
  4634  
  4635  			case Zevex_r_v_k_rm:
  4636  				from, from3, kmask, to := unpackOps4(p)
  4637  				ab.evex = newEVEXBits(z, &o.op)
  4638  				ab.asmevex(ctxt, p, to, from3, from, kmask)
  4639  				ab.asmand(ctxt, cursym, p, to, from)
  4640  
  4641  			case Zr_m_xm:
  4642  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4643  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4644  
  4645  			case Zr_m_xm_nr:
  4646  				ab.rexflag = 0
  4647  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4648  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4649  
  4650  			case Zo_m:
  4651  				ab.Put1(byte(op))
  4652  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4653  
  4654  			case Zcallindreg:
  4655  				r = obj.Addrel(cursym)
  4656  				r.Off = int32(p.Pc)
  4657  				r.Type = objabi.R_CALLIND
  4658  				r.Siz = 0
  4659  				fallthrough
  4660  
  4661  			case Zo_m64:
  4662  				ab.Put1(byte(op))
  4663  				ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  4664  
  4665  			case Zm_ibo:
  4666  				ab.Put1(byte(op))
  4667  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4668  				ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  4669  
  4670  			case Zibo_m:
  4671  				ab.Put1(byte(op))
  4672  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4673  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4674  
  4675  			case Zibo_m_xm:
  4676  				z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4677  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4678  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4679  
  4680  			case Z_ib, Zib_:
  4681  				if yt.zcase == Zib_ {
  4682  					a = &p.From
  4683  				} else {
  4684  					a = &p.To
  4685  				}
  4686  				ab.Put1(byte(op))
  4687  				if p.As == AXABORT {
  4688  					ab.Put1(o.op[z+1])
  4689  				}
  4690  				ab.Put1(byte(vaddr(ctxt, p, a, nil)))
  4691  
  4692  			case Zib_rp:
  4693  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4694  				ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  4695  
  4696  			case Zil_rp:
  4697  				ab.rexflag |= regrex[p.To.Reg] & Rxb
  4698  				ab.Put1(byte(op + reg[p.To.Reg]))
  4699  				if o.prefix == Pe {
  4700  					v = vaddr(ctxt, p, &p.From, nil)
  4701  					ab.PutInt16(int16(v))
  4702  				} else {
  4703  					ab.relput4(ctxt, cursym, p, &p.From)
  4704  				}
  4705  
  4706  			case Zo_iw:
  4707  				ab.Put1(byte(op))
  4708  				if p.From.Type != obj.TYPE_NONE {
  4709  					v = vaddr(ctxt, p, &p.From, nil)
  4710  					ab.PutInt16(int16(v))
  4711  				}
  4712  
  4713  			case Ziq_rp:
  4714  				v = vaddr(ctxt, p, &p.From, &rel)
  4715  				l = int(v >> 32)
  4716  				if l == 0 && rel.Siz != 8 {
  4717  					ab.rexflag &^= (0x40 | Rxw)
  4718  
  4719  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4720  					ab.Put1(byte(0xb8 + reg[p.To.Reg]))
  4721  					if rel.Type != 0 {
  4722  						r = obj.Addrel(cursym)
  4723  						*r = rel
  4724  						r.Off = int32(p.Pc + int64(ab.Len()))
  4725  					}
  4726  
  4727  					ab.PutInt32(int32(v))
  4728  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend
  4729  					ab.Put1(0xc7)
  4730  					ab.asmando(ctxt, cursym, p, &p.To, 0)
  4731  
  4732  					ab.PutInt32(int32(v)) // need all 8
  4733  				} else {
  4734  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4735  					ab.Put1(byte(op + reg[p.To.Reg]))
  4736  					if rel.Type != 0 {
  4737  						r = obj.Addrel(cursym)
  4738  						*r = rel
  4739  						r.Off = int32(p.Pc + int64(ab.Len()))
  4740  					}
  4741  
  4742  					ab.PutInt64(v)
  4743  				}
  4744  
  4745  			case Zib_rr:
  4746  				ab.Put1(byte(op))
  4747  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4748  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4749  
  4750  			case Z_il, Zil_:
  4751  				if yt.zcase == Zil_ {
  4752  					a = &p.From
  4753  				} else {
  4754  					a = &p.To
  4755  				}
  4756  				ab.Put1(byte(op))
  4757  				if o.prefix == Pe {
  4758  					v = vaddr(ctxt, p, a, nil)
  4759  					ab.PutInt16(int16(v))
  4760  				} else {
  4761  					ab.relput4(ctxt, cursym, p, a)
  4762  				}
  4763  
  4764  			case Zm_ilo, Zilo_m:
  4765  				ab.Put1(byte(op))
  4766  				if yt.zcase == Zilo_m {
  4767  					a = &p.From
  4768  					ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4769  				} else {
  4770  					a = &p.To
  4771  					ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4772  				}
  4773  
  4774  				if o.prefix == Pe {
  4775  					v = vaddr(ctxt, p, a, nil)
  4776  					ab.PutInt16(int16(v))
  4777  				} else {
  4778  					ab.relput4(ctxt, cursym, p, a)
  4779  				}
  4780  
  4781  			case Zil_rr:
  4782  				ab.Put1(byte(op))
  4783  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4784  				if o.prefix == Pe {
  4785  					v = vaddr(ctxt, p, &p.From, nil)
  4786  					ab.PutInt16(int16(v))
  4787  				} else {
  4788  					ab.relput4(ctxt, cursym, p, &p.From)
  4789  				}
  4790  
  4791  			case Z_rp:
  4792  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4793  				ab.Put1(byte(op + reg[p.To.Reg]))
  4794  
  4795  			case Zrp_:
  4796  				ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  4797  				ab.Put1(byte(op + reg[p.From.Reg]))
  4798  
  4799  			case Zcallcon, Zjmpcon:
  4800  				if yt.zcase == Zcallcon {
  4801  					ab.Put1(byte(op))
  4802  				} else {
  4803  					ab.Put1(o.op[z+1])
  4804  				}
  4805  				r = obj.Addrel(cursym)
  4806  				r.Off = int32(p.Pc + int64(ab.Len()))
  4807  				r.Type = objabi.R_PCREL
  4808  				r.Siz = 4
  4809  				r.Add = p.To.Offset
  4810  				ab.PutInt32(0)
  4811  
  4812  			case Zcallind:
  4813  				ab.Put2(byte(op), o.op[z+1])
  4814  				r = obj.Addrel(cursym)
  4815  				r.Off = int32(p.Pc + int64(ab.Len()))
  4816  				if ctxt.Arch.Family == sys.AMD64 {
  4817  					r.Type = objabi.R_PCREL
  4818  				} else {
  4819  					r.Type = objabi.R_ADDR
  4820  				}
  4821  				r.Siz = 4
  4822  				r.Add = p.To.Offset
  4823  				r.Sym = p.To.Sym
  4824  				ab.PutInt32(0)
  4825  
  4826  			case Zcall, Zcallduff:
  4827  				if p.To.Sym == nil {
  4828  					ctxt.Diag("call without target")
  4829  					ctxt.DiagFlush()
  4830  					log.Fatalf("bad code")
  4831  				}
  4832  
  4833  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  4834  					ctxt.Diag("directly calling duff when dynamically linking Go")
  4835  				}
  4836  
  4837  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4838  					// Maintain BP around call, since duffcopy/duffzero can't do it
  4839  					// (the call jumps into the middle of the function).
  4840  					// This makes it possible to see call sites for duffcopy/duffzero in
  4841  					// BP-based profiling tools like Linux perf (which is the
  4842  					// whole point of maintaining frame pointers in Go).
  4843  					// MOVQ BP, -16(SP)
  4844  					// LEAQ -16(SP), BP
  4845  					ab.Put(bpduff1)
  4846  				}
  4847  				ab.Put1(byte(op))
  4848  				r = obj.Addrel(cursym)
  4849  				r.Off = int32(p.Pc + int64(ab.Len()))
  4850  				r.Sym = p.To.Sym
  4851  				r.Add = p.To.Offset
  4852  				r.Type = objabi.R_CALL
  4853  				r.Siz = 4
  4854  				ab.PutInt32(0)
  4855  
  4856  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4857  					// Pop BP pushed above.
  4858  					// MOVQ 0(BP), BP
  4859  					ab.Put(bpduff2)
  4860  				}
  4861  
  4862  			// TODO: jump across functions needs reloc
  4863  			case Zbr, Zjmp, Zloop:
  4864  				if p.As == AXBEGIN {
  4865  					ab.Put1(byte(op))
  4866  				}
  4867  				if p.To.Sym != nil {
  4868  					if yt.zcase != Zjmp {
  4869  						ctxt.Diag("branch to ATEXT")
  4870  						ctxt.DiagFlush()
  4871  						log.Fatalf("bad code")
  4872  					}
  4873  
  4874  					ab.Put1(o.op[z+1])
  4875  					r = obj.Addrel(cursym)
  4876  					r.Off = int32(p.Pc + int64(ab.Len()))
  4877  					r.Sym = p.To.Sym
  4878  					// Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
  4879  					// it can point to a trampoline instead of the destination itself.
  4880  					r.Type = objabi.R_CALL
  4881  					r.Siz = 4
  4882  					ab.PutInt32(0)
  4883  					break
  4884  				}
  4885  
  4886  				// Assumes q is in this function.
  4887  				// TODO: Check in input, preserve in brchain.
  4888  
  4889  				// Fill in backward jump now.
  4890  				q = p.To.Target()
  4891  
  4892  				if q == nil {
  4893  					ctxt.Diag("jmp/branch/loop without target")
  4894  					ctxt.DiagFlush()
  4895  					log.Fatalf("bad code")
  4896  				}
  4897  
  4898  				if p.Back&branchBackwards != 0 {
  4899  					v = q.Pc - (p.Pc + 2)
  4900  					if v >= -128 && p.As != AXBEGIN {
  4901  						if p.As == AJCXZL {
  4902  							ab.Put1(0x67)
  4903  						}
  4904  						ab.Put2(byte(op), byte(v))
  4905  					} else if yt.zcase == Zloop {
  4906  						ctxt.Diag("loop too far: %v", p)
  4907  					} else {
  4908  						v -= 5 - 2
  4909  						if p.As == AXBEGIN {
  4910  							v--
  4911  						}
  4912  						if yt.zcase == Zbr {
  4913  							ab.Put1(0x0f)
  4914  							v--
  4915  						}
  4916  
  4917  						ab.Put1(o.op[z+1])
  4918  						ab.PutInt32(int32(v))
  4919  					}
  4920  
  4921  					break
  4922  				}
  4923  
  4924  				// Annotate target; will fill in later.
  4925  				p.Forwd = q.Rel
  4926  
  4927  				q.Rel = p
  4928  				if p.Back&branchShort != 0 && p.As != AXBEGIN {
  4929  					if p.As == AJCXZL {
  4930  						ab.Put1(0x67)
  4931  					}
  4932  					ab.Put2(byte(op), 0)
  4933  				} else if yt.zcase == Zloop {
  4934  					ctxt.Diag("loop too far: %v", p)
  4935  				} else {
  4936  					if yt.zcase == Zbr {
  4937  						ab.Put1(0x0f)
  4938  					}
  4939  					ab.Put1(o.op[z+1])
  4940  					ab.PutInt32(0)
  4941  				}
  4942  
  4943  			case Zbyte:
  4944  				v = vaddr(ctxt, p, &p.From, &rel)
  4945  				if rel.Siz != 0 {
  4946  					rel.Siz = uint8(op)
  4947  					r = obj.Addrel(cursym)
  4948  					*r = rel
  4949  					r.Off = int32(p.Pc + int64(ab.Len()))
  4950  				}
  4951  
  4952  				ab.Put1(byte(v))
  4953  				if op > 1 {
  4954  					ab.Put1(byte(v >> 8))
  4955  					if op > 2 {
  4956  						ab.PutInt16(int16(v >> 16))
  4957  						if op > 4 {
  4958  							ab.PutInt32(int32(v >> 32))
  4959  						}
  4960  					}
  4961  				}
  4962  			}
  4963  
  4964  			return
  4965  		}
  4966  	}
  4967  	f3t = Ynone * Ymax
  4968  	if p.GetFrom3() != nil {
  4969  		f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
  4970  	}
  4971  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  4972  		var pp obj.Prog
  4973  		var t []byte
  4974  		if p.As == mo[0].as {
  4975  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  4976  				t = mo[0].op[:]
  4977  				switch mo[0].code {
  4978  				default:
  4979  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  4980  
  4981  				case movLit:
  4982  					for z = 0; t[z] != 0; z++ {
  4983  						ab.Put1(t[z])
  4984  					}
  4985  
  4986  				case movRegMem:
  4987  					ab.Put1(t[0])
  4988  					ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  4989  
  4990  				case movMemReg:
  4991  					ab.Put1(t[0])
  4992  					ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  4993  
  4994  				case movRegMem2op: // r,m - 2op
  4995  					ab.Put2(t[0], t[1])
  4996  					ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  4997  					ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  4998  
  4999  				case movMemReg2op:
  5000  					ab.Put2(t[0], t[1])
  5001  					ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  5002  					ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  5003  
  5004  				case movFullPtr:
  5005  					if t[0] != 0 {
  5006  						ab.Put1(t[0])
  5007  					}
  5008  					switch p.To.Index {
  5009  					default:
  5010  						goto bad
  5011  
  5012  					case REG_DS:
  5013  						ab.Put1(0xc5)
  5014  
  5015  					case REG_SS:
  5016  						ab.Put2(0x0f, 0xb2)
  5017  
  5018  					case REG_ES:
  5019  						ab.Put1(0xc4)
  5020  
  5021  					case REG_FS:
  5022  						ab.Put2(0x0f, 0xb4)
  5023  
  5024  					case REG_GS:
  5025  						ab.Put2(0x0f, 0xb5)
  5026  					}
  5027  
  5028  					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  5029  
  5030  				case movDoubleShift:
  5031  					if t[0] == Pw {
  5032  						if ctxt.Arch.Family != sys.AMD64 {
  5033  							ctxt.Diag("asmins: illegal 64: %v", p)
  5034  						}
  5035  						ab.rexflag |= Pw
  5036  						t = t[1:]
  5037  					} else if t[0] == Pe {
  5038  						ab.Put1(Pe)
  5039  						t = t[1:]
  5040  					}
  5041  
  5042  					switch p.From.Type {
  5043  					default:
  5044  						goto bad
  5045  
  5046  					case obj.TYPE_CONST:
  5047  						ab.Put2(0x0f, t[0])
  5048  						ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5049  						ab.Put1(byte(p.From.Offset))
  5050  
  5051  					case obj.TYPE_REG:
  5052  						switch p.From.Reg {
  5053  						default:
  5054  							goto bad
  5055  
  5056  						case REG_CL, REG_CX:
  5057  							ab.Put2(0x0f, t[1])
  5058  							ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5059  						}
  5060  					}
  5061  
  5062  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5063  				// where you load the TLS base register into a register and then index off that
  5064  				// register to access the actual TLS variables. Systems that allow direct TLS access
  5065  				// are handled in prefixof above and should not be listed here.
  5066  				case movTLSReg:
  5067  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  5068  						ctxt.Diag("invalid load of TLS: %v", p)
  5069  					}
  5070  
  5071  					if ctxt.Arch.Family == sys.I386 {
  5072  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5073  						// where you load the TLS base register into a register and then index off that
  5074  						// register to access the actual TLS variables. Systems that allow direct TLS access
  5075  						// are handled in prefixof above and should not be listed here.
  5076  						switch ctxt.Headtype {
  5077  						default:
  5078  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5079  
  5080  						case objabi.Hlinux, objabi.Hfreebsd:
  5081  							if ctxt.Flag_shared {
  5082  								// Note that this is not generating the same insns as the other cases.
  5083  								//     MOV TLS, dst
  5084  								// becomes
  5085  								//     call __x86.get_pc_thunk.dst
  5086  								//     movl (gotpc + g@gotntpoff)(dst), dst
  5087  								// which is encoded as
  5088  								//     call __x86.get_pc_thunk.dst
  5089  								//     movq 0(dst), dst
  5090  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  5091  								// is g, which we can't check here, but will when we assemble the second
  5092  								// instruction.
  5093  								dst := p.To.Reg
  5094  								ab.Put1(0xe8)
  5095  								r = obj.Addrel(cursym)
  5096  								r.Off = int32(p.Pc + int64(ab.Len()))
  5097  								r.Type = objabi.R_CALL
  5098  								r.Siz = 4
  5099  								r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
  5100  								ab.PutInt32(0)
  5101  
  5102  								ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  5103  								r = obj.Addrel(cursym)
  5104  								r.Off = int32(p.Pc + int64(ab.Len()))
  5105  								r.Type = objabi.R_TLS_IE
  5106  								r.Siz = 4
  5107  								r.Add = 2
  5108  								ab.PutInt32(0)
  5109  							} else {
  5110  								// ELF TLS base is 0(GS).
  5111  								pp.From = p.From
  5112  
  5113  								pp.From.Type = obj.TYPE_MEM
  5114  								pp.From.Reg = REG_GS
  5115  								pp.From.Offset = 0
  5116  								pp.From.Index = REG_NONE
  5117  								pp.From.Scale = 0
  5118  								ab.Put2(0x65, // GS
  5119  									0x8B)
  5120  								ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5121  							}
  5122  						case objabi.Hplan9:
  5123  							pp.From = obj.Addr{}
  5124  							pp.From.Type = obj.TYPE_MEM
  5125  							pp.From.Name = obj.NAME_EXTERN
  5126  							pp.From.Sym = plan9privates
  5127  							pp.From.Offset = 0
  5128  							pp.From.Index = REG_NONE
  5129  							ab.Put1(0x8B)
  5130  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5131  
  5132  						case objabi.Hwindows:
  5133  							// Windows TLS base is always 0x14(FS).
  5134  							pp.From = p.From
  5135  
  5136  							pp.From.Type = obj.TYPE_MEM
  5137  							pp.From.Reg = REG_FS
  5138  							pp.From.Offset = 0x14
  5139  							pp.From.Index = REG_NONE
  5140  							pp.From.Scale = 0
  5141  							ab.Put2(0x64, // FS
  5142  								0x8B)
  5143  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5144  						}
  5145  						break
  5146  					}
  5147  
  5148  					switch ctxt.Headtype {
  5149  					default:
  5150  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5151  
  5152  					case objabi.Hlinux, objabi.Hfreebsd:
  5153  						if !ctxt.Flag_shared {
  5154  							log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
  5155  						}
  5156  						// Note that this is not generating the same insn as the other cases.
  5157  						//     MOV TLS, R_to
  5158  						// becomes
  5159  						//     movq g@gottpoff(%rip), R_to
  5160  						// which is encoded as
  5161  						//     movq 0(%rip), R_to
  5162  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  5163  						// is g, which we can't check here, but will when we assemble the second
  5164  						// instruction.
  5165  						ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  5166  
  5167  						ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  5168  						r = obj.Addrel(cursym)
  5169  						r.Off = int32(p.Pc + int64(ab.Len()))
  5170  						r.Type = objabi.R_TLS_IE
  5171  						r.Siz = 4
  5172  						r.Add = -4
  5173  						ab.PutInt32(0)
  5174  
  5175  					case objabi.Hplan9:
  5176  						pp.From = obj.Addr{}
  5177  						pp.From.Type = obj.TYPE_MEM
  5178  						pp.From.Name = obj.NAME_EXTERN
  5179  						pp.From.Sym = plan9privates
  5180  						pp.From.Offset = 0
  5181  						pp.From.Index = REG_NONE
  5182  						ab.rexflag |= Pw
  5183  						ab.Put1(0x8B)
  5184  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5185  
  5186  					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  5187  						// TLS base is 0(FS).
  5188  						pp.From = p.From
  5189  
  5190  						pp.From.Type = obj.TYPE_MEM
  5191  						pp.From.Name = obj.NAME_NONE
  5192  						pp.From.Reg = REG_NONE
  5193  						pp.From.Offset = 0
  5194  						pp.From.Index = REG_NONE
  5195  						pp.From.Scale = 0
  5196  						ab.rexflag |= Pw
  5197  						ab.Put2(0x64, // FS
  5198  							0x8B)
  5199  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5200  
  5201  					case objabi.Hwindows:
  5202  						// Windows TLS base is always 0x28(GS).
  5203  						pp.From = p.From
  5204  
  5205  						pp.From.Type = obj.TYPE_MEM
  5206  						pp.From.Name = obj.NAME_NONE
  5207  						pp.From.Reg = REG_GS
  5208  						pp.From.Offset = 0x28
  5209  						pp.From.Index = REG_NONE
  5210  						pp.From.Scale = 0
  5211  						ab.rexflag |= Pw
  5212  						ab.Put2(0x65, // GS
  5213  							0x8B)
  5214  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5215  					}
  5216  				}
  5217  				return
  5218  			}
  5219  		}
  5220  	}
  5221  	goto bad
  5222  
  5223  bad:
  5224  	if ctxt.Arch.Family != sys.AMD64 {
  5225  		// here, the assembly has failed.
  5226  		// if it's a byte instruction that has
  5227  		// unaddressable registers, try to
  5228  		// exchange registers and reissue the
  5229  		// instruction with the operands renamed.
  5230  		pp := *p
  5231  
  5232  		unbytereg(&pp.From, &pp.Ft)
  5233  		unbytereg(&pp.To, &pp.Tt)
  5234  
  5235  		z := int(p.From.Reg)
  5236  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5237  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5238  			// For now, different to keep bit-for-bit compatibility.
  5239  			if ctxt.Arch.Family == sys.I386 {
  5240  				breg := byteswapreg(ctxt, &p.To)
  5241  				if breg != REG_AX {
  5242  					ab.Put1(0x87) // xchg lhs,bx
  5243  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5244  					subreg(&pp, z, breg)
  5245  					ab.doasm(ctxt, cursym, &pp)
  5246  					ab.Put1(0x87) // xchg lhs,bx
  5247  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5248  				} else {
  5249  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5250  					subreg(&pp, z, REG_AX)
  5251  					ab.doasm(ctxt, cursym, &pp)
  5252  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5253  				}
  5254  				return
  5255  			}
  5256  
  5257  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  5258  				// We certainly don't want to exchange
  5259  				// with AX if the op is MUL or DIV.
  5260  				ab.Put1(0x87) // xchg lhs,bx
  5261  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5262  				subreg(&pp, z, REG_BX)
  5263  				ab.doasm(ctxt, cursym, &pp)
  5264  				ab.Put1(0x87) // xchg lhs,bx
  5265  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5266  			} else {
  5267  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5268  				subreg(&pp, z, REG_AX)
  5269  				ab.doasm(ctxt, cursym, &pp)
  5270  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5271  			}
  5272  			return
  5273  		}
  5274  
  5275  		z = int(p.To.Reg)
  5276  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5277  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5278  			// For now, different to keep bit-for-bit compatibility.
  5279  			if ctxt.Arch.Family == sys.I386 {
  5280  				breg := byteswapreg(ctxt, &p.From)
  5281  				if breg != REG_AX {
  5282  					ab.Put1(0x87) //xchg rhs,bx
  5283  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5284  					subreg(&pp, z, breg)
  5285  					ab.doasm(ctxt, cursym, &pp)
  5286  					ab.Put1(0x87) // xchg rhs,bx
  5287  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5288  				} else {
  5289  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5290  					subreg(&pp, z, REG_AX)
  5291  					ab.doasm(ctxt, cursym, &pp)
  5292  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5293  				}
  5294  				return
  5295  			}
  5296  
  5297  			if isax(&p.From) {
  5298  				ab.Put1(0x87) // xchg rhs,bx
  5299  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5300  				subreg(&pp, z, REG_BX)
  5301  				ab.doasm(ctxt, cursym, &pp)
  5302  				ab.Put1(0x87) // xchg rhs,bx
  5303  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5304  			} else {
  5305  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5306  				subreg(&pp, z, REG_AX)
  5307  				ab.doasm(ctxt, cursym, &pp)
  5308  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5309  			}
  5310  			return
  5311  		}
  5312  	}
  5313  
  5314  	ctxt.Diag("%s: invalid instruction: %v", cursym.Name, p)
  5315  }
  5316  
  5317  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  5318  // which is not referenced in a.
  5319  // If a is empty, it returns BX to account for MULB-like instructions
  5320  // that might use DX and AX.
  5321  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  5322  	cana, canb, canc, cand := true, true, true, true
  5323  	if a.Type == obj.TYPE_NONE {
  5324  		cana, cand = false, false
  5325  	}
  5326  
  5327  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  5328  		switch a.Reg {
  5329  		case REG_NONE:
  5330  			cana, cand = false, false
  5331  		case REG_AX, REG_AL, REG_AH:
  5332  			cana = false
  5333  		case REG_BX, REG_BL, REG_BH:
  5334  			canb = false
  5335  		case REG_CX, REG_CL, REG_CH:
  5336  			canc = false
  5337  		case REG_DX, REG_DL, REG_DH:
  5338  			cand = false
  5339  		}
  5340  	}
  5341  
  5342  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  5343  		switch a.Index {
  5344  		case REG_AX:
  5345  			cana = false
  5346  		case REG_BX:
  5347  			canb = false
  5348  		case REG_CX:
  5349  			canc = false
  5350  		case REG_DX:
  5351  			cand = false
  5352  		}
  5353  	}
  5354  
  5355  	switch {
  5356  	case cana:
  5357  		return REG_AX
  5358  	case canb:
  5359  		return REG_BX
  5360  	case canc:
  5361  		return REG_CX
  5362  	case cand:
  5363  		return REG_DX
  5364  	default:
  5365  		ctxt.Diag("impossible byte register")
  5366  		ctxt.DiagFlush()
  5367  		log.Fatalf("bad code")
  5368  		return 0
  5369  	}
  5370  }
  5371  
  5372  func isbadbyte(a *obj.Addr) bool {
  5373  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  5374  }
  5375  
  5376  func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  5377  	ab.Reset()
  5378  
  5379  	ab.rexflag = 0
  5380  	ab.vexflag = false
  5381  	ab.evexflag = false
  5382  	mark := ab.Len()
  5383  	ab.doasm(ctxt, cursym, p)
  5384  	if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5385  		// as befits the whole approach of the architecture,
  5386  		// the rex prefix must appear before the first opcode byte
  5387  		// (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  5388  		// before the 0f opcode escape!), or it might be ignored.
  5389  		// note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  5390  		if ctxt.Arch.Family != sys.AMD64 {
  5391  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  5392  		}
  5393  		n := ab.Len()
  5394  		var np int
  5395  		for np = mark; np < n; np++ {
  5396  			c := ab.At(np)
  5397  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  5398  				break
  5399  			}
  5400  		}
  5401  		ab.Insert(np, byte(0x40|ab.rexflag))
  5402  	}
  5403  
  5404  	n := ab.Len()
  5405  	for i := len(cursym.R) - 1; i >= 0; i-- {
  5406  		r := &cursym.R[i]
  5407  		if int64(r.Off) < p.Pc {
  5408  			break
  5409  		}
  5410  		if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5411  			r.Off++
  5412  		}
  5413  		if r.Type == objabi.R_PCREL {
  5414  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  5415  				// PC-relative addressing is relative to the end of the instruction,
  5416  				// but the relocations applied by the linker are relative to the end
  5417  				// of the relocation. Because immediate instruction
  5418  				// arguments can follow the PC-relative memory reference in the
  5419  				// instruction encoding, the two may not coincide. In this case,
  5420  				// adjust addend so that linker can keep relocating relative to the
  5421  				// end of the relocation.
  5422  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  5423  			} else if ctxt.Arch.Family == sys.I386 {
  5424  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  5425  				// assumes that the previous instruction loaded the PC of the end
  5426  				// of that instruction into CX, so the adjustment is relative to
  5427  				// that.
  5428  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5429  			}
  5430  		}
  5431  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  5432  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  5433  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5434  		}
  5435  
  5436  	}
  5437  }
  5438  
  5439  // unpackOps4 extracts 4 operands from p.
  5440  func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
  5441  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To
  5442  }
  5443  
  5444  // unpackOps5 extracts 5 operands from p.
  5445  func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
  5446  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To
  5447  }
  5448  

View as plain text