1
2
3
4
5 package amd64
6
7 import (
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/ir"
10 "cmd/compile/internal/objw"
11 "cmd/compile/internal/types"
12 "cmd/internal/obj"
13 "cmd/internal/obj/x86"
14 "internal/buildcfg"
15 )
16
17
18 var isPlan9 = buildcfg.GOOS == "plan9"
19
20
21
22 const (
23 dzBlocks = 16
24 dzBlockLen = 4
25 dzBlockSize = 23
26 dzMovSize = 5
27 dzLeaqSize = 4
28 dzClearStep = 16
29
30 dzClearLen = dzClearStep * dzBlockLen
31 dzSize = dzBlocks * dzBlockSize
32 )
33
34
35
36 func dzOff(b int64) int64 {
37 off := int64(dzSize)
38 off -= b / dzClearLen * dzBlockSize
39 tailLen := b % dzClearLen
40 if tailLen >= dzClearStep {
41 off -= dzLeaqSize + dzMovSize*(tailLen/dzClearStep)
42 }
43 return off
44 }
45
46
47
48 func dzDI(b int64) int64 {
49 tailLen := b % dzClearLen
50 if tailLen < dzClearStep {
51 return 0
52 }
53 tailSteps := tailLen / dzClearStep
54 return -dzClearStep * (dzBlockLen - tailSteps)
55 }
56
57 func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.Prog {
58 const (
59 r13 = 1 << iota
60 )
61
62 if cnt == 0 {
63 return p
64 }
65
66 if cnt%int64(types.RegSize) != 0 {
67
68 if cnt%int64(types.PtrSize) != 0 {
69 base.Fatalf("zerorange count not a multiple of widthptr %d", cnt)
70 }
71 if *state&r13 == 0 {
72 p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_R13, 0)
73 *state |= r13
74 }
75 p = pp.Append(p, x86.AMOVL, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_MEM, x86.REG_SP, off)
76 off += int64(types.PtrSize)
77 cnt -= int64(types.PtrSize)
78 }
79
80 if cnt == 8 {
81 if *state&r13 == 0 {
82 p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_R13, 0)
83 *state |= r13
84 }
85 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_MEM, x86.REG_SP, off)
86 } else if !isPlan9 && cnt <= int64(8*types.RegSize) {
87 for i := int64(0); i < cnt/16; i++ {
88 p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off+i*16)
89 }
90
91 if cnt%16 != 0 {
92 p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off+cnt-int64(16))
93 }
94 } else if !isPlan9 && (cnt <= int64(128*types.RegSize)) {
95
96
97 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0)
98
99 p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off+dzDI(cnt), obj.TYPE_REG, x86.REG_DI, 0)
100 p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(cnt))
101 p.To.Sym = ir.Syms.Duffzero
102 if cnt%16 != 0 {
103 p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_DI, -int64(8))
104 }
105
106 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0)
107
108 } else {
109
110
111
112
113
114
115
116
117
118 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0)
119 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_REG, x86.REG_R13, 0)
120 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_CX, 0, obj.TYPE_REG, x86.REG_R15, 0)
121
122
123 p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
124 p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(types.RegSize), obj.TYPE_REG, x86.REG_CX, 0)
125 p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off, obj.TYPE_REG, x86.REG_DI, 0)
126 p = pp.Append(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
127 p = pp.Append(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
128
129
130 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0)
131 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_REG, x86.REG_AX, 0)
132 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R15, 0, obj.TYPE_REG, x86.REG_CX, 0)
133
134
135 *state &= ^uint32(r13)
136 }
137
138 return p
139 }
140
141 func ginsnop(pp *objw.Progs) *obj.Prog {
142
143
144
145
146
147
148 p := pp.Prog(x86.AXCHGL)
149 p.From.Type = obj.TYPE_REG
150 p.From.Reg = x86.REG_AX
151 p.To.Type = obj.TYPE_REG
152 p.To.Reg = x86.REG_AX
153 return p
154 }
155
View as plain text