doc.go

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  /*
     6  Package arm64 implements an ARM64 assembler. Go assembly syntax is different from GNU ARM64
     7  syntax, but we can still follow the general rules to map between them.
     8  
     9  Instructions mnemonics mapping rules
    10  
    11  1. Most instructions use width suffixes of instruction names to indicate operand width rather than
    12  using different register names.
    13  
    14    Examples:
    15      ADC R24, R14, R12          <=>     adc x12, x24
    16      ADDW R26->24, R21, R15     <=>     add w15, w21, w26, asr #24
    17      FCMPS F2, F3               <=>     fcmp s3, s2
    18      FCMPD F2, F3               <=>     fcmp d3, d2
    19      FCVTDH F2, F3              <=>     fcvt h3, d2
    20  
    21  2. Go uses .P and .W suffixes to indicate post-increment and pre-increment.
    22  
    23    Examples:
    24      MOVD.P -8(R10), R8         <=>      ldr x8, [x10],#-8
    25      MOVB.W 16(R16), R10        <=>      ldrsb x10, [x16,#16]!
    26      MOVBU.W 16(R16), R10       <=>      ldrb x10, [x16,#16]!
    27  
    28  3. Go uses a series of MOV instructions as load and store.
    29  
    30  64-bit variant ldr, str, stur => MOVD;
    31  32-bit variant str, stur, ldrsw => MOVW;
    32  32-bit variant ldr => MOVWU;
    33  ldrb => MOVBU; ldrh => MOVHU;
    34  ldrsb, sturb, strb => MOVB;
    35  ldrsh, sturh, strh =>  MOVH.
    36  
    37  4. Go moves conditions into opcode suffix, like BLT.
    38  
    39  5. Go adds a V prefix for most floating-point and SIMD instructions, except cryptographic extension
    40  instructions and floating-point(scalar) instructions.
    41  
    42    Examples:
    43      VADD V5.H8, V18.H8, V9.H8         <=>      add v9.8h, v18.8h, v5.8h
    44      VLD1.P (R6)(R11), [V31.D1]        <=>      ld1 {v31.1d}, [x6], x11
    45      VFMLA V29.S2, V20.S2, V14.S2      <=>      fmla v14.2s, v20.2s, v29.2s
    46      AESD V22.B16, V19.B16             <=>      aesd v19.16b, v22.16b
    47      SCVTFWS R3, F16                   <=>      scvtf s17, w6
    48  
    49  6. Align directive
    50  
    51  Go asm supports the PCALIGN directive, which indicates that the next instruction should be aligned
    52  to a specified boundary by padding with NOOP instruction. The alignment value supported on arm64
    53  must be a power of 2 and in the range of [8, 2048].
    54  
    55    Examples:
    56      PCALIGN $16
    57      MOVD $2, R0          // This instruction is aligned with 16 bytes.
    58      PCALIGN $1024
    59      MOVD $3, R1          // This instruction is aligned with 1024 bytes.
    60  
    61  PCALIGN also changes the function alignment. If a function has one or more PCALIGN directives,
    62  its address will be aligned to the same or coarser boundary, which is the maximum of all the
    63  alignment values.
    64  
    65  In the following example, the function Add is aligned with 128 bytes.
    66    Examples:
    67      TEXT ·Add(SB),$40-16
    68      MOVD $2, R0
    69      PCALIGN $32
    70      MOVD $4, R1
    71      PCALIGN $128
    72      MOVD $8, R2
    73      RET
    74  
    75  On arm64, functions in Go are aligned to 16 bytes by default, we can also use PCALGIN to set the
    76  function alignment. The functions that need to be aligned are preferably using NOFRAME and NOSPLIT
    77  to avoid the impact of the prologues inserted by the assembler, so that the function address will
    78  have the same alignment as the first hand-written instruction.
    79  
    80  In the following example, PCALIGN at the entry of the function Add will align its address to 2048 bytes.
    81  
    82    Examples:
    83      TEXT ·Add(SB),NOSPLIT|NOFRAME,$0
    84        PCALIGN $2048
    85        MOVD $1, R0
    86        MOVD $1, R1
    87        RET
    88  
    89  7. Move large constants to vector registers.
    90  
    91  Go asm uses VMOVQ/VMOVD/VMOVS to move 128-bit, 64-bit and 32-bit constants into vector registers, respectively.
    92  And for a 128-bit interger, it take two 64-bit operands, for the low and high parts separately.
    93  
    94    Examples:
    95      VMOVS $0x11223344, V0
    96      VMOVD $0x1122334455667788, V1
    97      VMOVQ $0x1122334455667788, $0x99aabbccddeeff00, V2   // V2=0x99aabbccddeeff001122334455667788
    98  
    99  8. Move an optionally-shifted 16-bit immediate value to a register.
   100  
   101  The instructions are MOVK(W), MOVZ(W) and MOVN(W), the assembly syntax is "op $(uimm16<<shift), <Rd>". The <uimm16>
   102  is the 16-bit unsigned immediate, in the range 0 to 65535; For the 32-bit variant, the <shift> is 0 or 16, for the
   103  64-bit variant, the <shift> is 0, 16, 32 or 48.
   104  
   105  The current Go assembler does not accept zero shifts, such as "op $0, Rd" and "op $(0<<(16|32|48)), Rd" instructions.
   106  
   107    Examples:
   108      MOVK $(10<<32), R20     <=>      movk x20, #10, lsl #32
   109      MOVZW $(20<<16), R8     <=>      movz w8, #20, lsl #16
   110      MOVK $(0<<16), R10 will be reported as an error by the assembler.
   111  
   112  Special Cases.
   113  
   114  (1) umov is written as VMOV.
   115  
   116  (2) br is renamed JMP, blr is renamed CALL.
   117  
   118  (3) No need to add "W" suffix: LDARB, LDARH, LDAXRB, LDAXRH, LDTRH, LDXRB, LDXRH.
   119  
   120  (4) In Go assembly syntax, NOP is a zero-width pseudo-instruction serves generic purpose, nothing
   121  related to real ARM64 instruction. NOOP serves for the hardware nop instruction. NOOP is an alias of
   122  HINT $0.
   123  
   124    Examples:
   125      VMOV V13.B[1], R20      <=>      mov x20, v13.b[1]
   126      VMOV V13.H[1], R20      <=>      mov w20, v13.h[1]
   127      JMP (R3)                <=>      br x3
   128      CALL (R17)              <=>      blr x17
   129      LDAXRB (R19), R16       <=>      ldaxrb w16, [x19]
   130      NOOP                    <=>      nop
   131  
   132  
   133  Register mapping rules
   134  
   135  1. All basic register names are written as Rn.
   136  
   137  2. Go uses ZR as the zero register and RSP as the stack pointer.
   138  
   139  3. Bn, Hn, Dn, Sn and Qn instructions are written as Fn in floating-point instructions and as Vn
   140  in SIMD instructions.
   141  
   142  
   143  Argument mapping rules
   144  
   145  1. The operands appear in left-to-right assignment order.
   146  
   147  Go reverses the arguments of most instructions.
   148  
   149      Examples:
   150        ADD R11.SXTB<<1, RSP, R25      <=>      add x25, sp, w11, sxtb #1
   151        VADD V16, V19, V14             <=>      add d14, d19, d16
   152  
   153  Special Cases.
   154  
   155  (1) Argument order is the same as in the GNU ARM64 syntax: cbz, cbnz and some store instructions,
   156  such as str, stur, strb, sturb, strh, sturh stlr, stlrb. stlrh, st1.
   157  
   158    Examples:
   159      MOVD R29, 384(R19)    <=>    str x29, [x19,#384]
   160      MOVB.P R30, 30(R4)    <=>    strb w30, [x4],#30
   161      STLRH R21, (R19)      <=>    stlrh w21, [x19]
   162  
   163  (2) MADD, MADDW, MSUB, MSUBW, SMADDL, SMSUBL, UMADDL, UMSUBL <Rm>, <Ra>, <Rn>, <Rd>
   164  
   165    Examples:
   166      MADD R2, R30, R22, R6       <=>    madd x6, x22, x2, x30
   167      SMSUBL R10, R3, R17, R27    <=>    smsubl x27, w17, w10, x3
   168  
   169  (3) FMADDD, FMADDS, FMSUBD, FMSUBS, FNMADDD, FNMADDS, FNMSUBD, FNMSUBS <Fm>, <Fa>, <Fn>, <Fd>
   170  
   171    Examples:
   172      FMADDD F30, F20, F3, F29    <=>    fmadd d29, d3, d30, d20
   173      FNMSUBS F7, F25, F7, F22    <=>    fnmsub s22, s7, s7, s25
   174  
   175  (4) BFI, BFXIL, SBFIZ, SBFX, UBFIZ, UBFX $<lsb>, <Rn>, $<width>, <Rd>
   176  
   177    Examples:
   178      BFIW $16, R20, $6, R0      <=>    bfi w0, w20, #16, #6
   179      UBFIZ $34, R26, $5, R20    <=>    ubfiz x20, x26, #34, #5
   180  
   181  (5) FCCMPD, FCCMPS, FCCMPED, FCCMPES <cond>, Fm. Fn, $<nzcv>
   182  
   183    Examples:
   184      FCCMPD AL, F8, F26, $0     <=>    fccmp d26, d8, #0x0, al
   185      FCCMPS VS, F29, F4, $4     <=>    fccmp s4, s29, #0x4, vs
   186      FCCMPED LE, F20, F5, $13   <=>    fccmpe d5, d20, #0xd, le
   187      FCCMPES NE, F26, F10, $0   <=>    fccmpe s10, s26, #0x0, ne
   188  
   189  (6) CCMN, CCMNW, CCMP, CCMPW <cond>, <Rn>, $<imm>, $<nzcv>
   190  
   191    Examples:
   192      CCMP MI, R22, $12, $13     <=>    ccmp x22, #0xc, #0xd, mi
   193      CCMNW AL, R1, $11, $8      <=>    ccmn w1, #0xb, #0x8, al
   194  
   195  (7) CCMN, CCMNW, CCMP, CCMPW <cond>, <Rn>, <Rm>, $<nzcv>
   196  
   197    Examples:
   198      CCMN VS, R13, R22, $10     <=>    ccmn x13, x22, #0xa, vs
   199      CCMPW HS, R19, R14, $11    <=>    ccmp w19, w14, #0xb, cs
   200  
   201  (9) CSEL, CSELW, CSNEG, CSNEGW, CSINC, CSINCW <cond>, <Rn>, <Rm>, <Rd> ;
   202  FCSELD, FCSELS <cond>, <Fn>, <Fm>, <Fd>
   203  
   204    Examples:
   205      CSEL GT, R0, R19, R1        <=>    csel x1, x0, x19, gt
   206      CSNEGW GT, R7, R17, R8      <=>    csneg w8, w7, w17, gt
   207      FCSELD EQ, F15, F18, F16    <=>    fcsel d16, d15, d18, eq
   208  
   209  (10) TBNZ, TBZ $<imm>, <Rt>, <label>
   210  
   211  
   212  (11) STLXR, STLXRW, STXR, STXRW, STLXRB, STLXRH, STXRB, STXRH  <Rf>, (<Rn|RSP>), <Rs>
   213  
   214    Examples:
   215      STLXR ZR, (R15), R16    <=>    stlxr w16, xzr, [x15]
   216      STXRB R9, (R21), R19    <=>    stxrb w19, w9, [x21]
   217  
   218  (12) STLXP, STLXPW, STXP, STXPW (<Rf1>, <Rf2>), (<Rn|RSP>), <Rs>
   219  
   220    Examples:
   221      STLXP (R17, R19), (R4), R5      <=>    stlxp w5, x17, x19, [x4]
   222      STXPW (R30, R25), (R22), R13    <=>    stxp w13, w30, w25, [x22]
   223  
   224  2. Expressions for special arguments.
   225  
   226  #<immediate> is written as $<immediate>.
   227  
   228  Optionally-shifted immediate.
   229  
   230    Examples:
   231      ADD $(3151<<12), R14, R20     <=>    add x20, x14, #0xc4f, lsl #12
   232      ADDW $1864, R25, R6           <=>    add w6, w25, #0x748
   233  
   234  Optionally-shifted registers are written as <Rm>{<shift><amount>}.
   235  The <shift> can be <<(lsl), >>(lsr), ->(asr), @>(ror).
   236  
   237    Examples:
   238      ADD R19>>30, R10, R24     <=>    add x24, x10, x19, lsr #30
   239      ADDW R26->24, R21, R15    <=>    add w15, w21, w26, asr #24
   240  
   241  Extended registers are written as <Rm>{.<extend>{<<<amount>}}.
   242  <extend> can be UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW or SXTX.
   243  
   244    Examples:
   245      ADDS R19.UXTB<<4, R9, R26     <=>    adds x26, x9, w19, uxtb #4
   246      ADDSW R14.SXTX, R14, R6       <=>    adds w6, w14, w14, sxtx
   247  
   248  Memory references: [<Xn|SP>{,#0}] is written as (Rn|RSP), a base register and an immediate
   249  offset is written as imm(Rn|RSP), a base register and an offset register is written as (Rn|RSP)(Rm).
   250  
   251    Examples:
   252      LDAR (R22), R9                  <=>    ldar x9, [x22]
   253      LDP 28(R17), (R15, R23)         <=>    ldp x15, x23, [x17,#28]
   254      MOVWU (R4)(R12<<2), R8          <=>    ldr w8, [x4, x12, lsl #2]
   255      MOVD (R7)(R11.UXTW<<3), R25     <=>    ldr x25, [x7,w11,uxtw #3]
   256      MOVBU (R27)(R23), R14           <=>    ldrb w14, [x27,x23]
   257  
   258  Register pairs are written as (Rt1, Rt2).
   259  
   260    Examples:
   261      LDP.P -240(R11), (R12, R26)    <=>    ldp x12, x26, [x11],#-240
   262  
   263  Register with arrangement and register with arrangement and index.
   264  
   265    Examples:
   266      VADD V5.H8, V18.H8, V9.H8                     <=>    add v9.8h, v18.8h, v5.8h
   267      VLD1 (R2), [V21.B16]                          <=>    ld1 {v21.16b}, [x2]
   268      VST1.P V9.S[1], (R16)(R21)                    <=>    st1 {v9.s}[1], [x16], x28
   269      VST1.P [V13.H8, V14.H8, V15.H8], (R3)(R14)    <=>    st1 {v13.8h-v15.8h}, [x3], x14
   270      VST1.P [V14.D1, V15.D1], (R7)(R23)            <=>    st1 {v14.1d, v15.1d}, [x7], x23
   271  */
   272  package arm64
   273
View as plain text