Text file
src/math/big/arith_386.s
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !math_big_pure_go
6 // +build !math_big_pure_go
7
8 #include "textflag.h"
9
10 // This file provides fast assembly versions for the elementary
11 // arithmetic operations on vectors implemented in arith.go.
12
13 // func mulWW(x, y Word) (z1, z0 Word)
14 TEXT ·mulWW(SB),NOSPLIT,$0
15 MOVL x+0(FP), AX
16 MULL y+4(FP)
17 MOVL DX, z1+8(FP)
18 MOVL AX, z0+12(FP)
19 RET
20
21
22 // func addVV(z, x, y []Word) (c Word)
23 TEXT ·addVV(SB),NOSPLIT,$0
24 MOVL z+0(FP), DI
25 MOVL x+12(FP), SI
26 MOVL y+24(FP), CX
27 MOVL z_len+4(FP), BP
28 MOVL $0, BX // i = 0
29 MOVL $0, DX // c = 0
30 JMP E1
31
32 L1: MOVL (SI)(BX*4), AX
33 ADDL DX, DX // restore CF
34 ADCL (CX)(BX*4), AX
35 SBBL DX, DX // save CF
36 MOVL AX, (DI)(BX*4)
37 ADDL $1, BX // i++
38
39 E1: CMPL BX, BP // i < n
40 JL L1
41
42 NEGL DX
43 MOVL DX, c+36(FP)
44 RET
45
46
47 // func subVV(z, x, y []Word) (c Word)
48 // (same as addVV except for SBBL instead of ADCL and label names)
49 TEXT ·subVV(SB),NOSPLIT,$0
50 MOVL z+0(FP), DI
51 MOVL x+12(FP), SI
52 MOVL y+24(FP), CX
53 MOVL z_len+4(FP), BP
54 MOVL $0, BX // i = 0
55 MOVL $0, DX // c = 0
56 JMP E2
57
58 L2: MOVL (SI)(BX*4), AX
59 ADDL DX, DX // restore CF
60 SBBL (CX)(BX*4), AX
61 SBBL DX, DX // save CF
62 MOVL AX, (DI)(BX*4)
63 ADDL $1, BX // i++
64
65 E2: CMPL BX, BP // i < n
66 JL L2
67
68 NEGL DX
69 MOVL DX, c+36(FP)
70 RET
71
72
73 // func addVW(z, x []Word, y Word) (c Word)
74 TEXT ·addVW(SB),NOSPLIT,$0
75 MOVL z+0(FP), DI
76 MOVL x+12(FP), SI
77 MOVL y+24(FP), AX // c = y
78 MOVL z_len+4(FP), BP
79 MOVL $0, BX // i = 0
80 JMP E3
81
82 L3: ADDL (SI)(BX*4), AX
83 MOVL AX, (DI)(BX*4)
84 SBBL AX, AX // save CF
85 NEGL AX
86 ADDL $1, BX // i++
87
88 E3: CMPL BX, BP // i < n
89 JL L3
90
91 MOVL AX, c+28(FP)
92 RET
93
94
95 // func subVW(z, x []Word, y Word) (c Word)
96 TEXT ·subVW(SB),NOSPLIT,$0
97 MOVL z+0(FP), DI
98 MOVL x+12(FP), SI
99 MOVL y+24(FP), AX // c = y
100 MOVL z_len+4(FP), BP
101 MOVL $0, BX // i = 0
102 JMP E4
103
104 L4: MOVL (SI)(BX*4), DX
105 SUBL AX, DX
106 MOVL DX, (DI)(BX*4)
107 SBBL AX, AX // save CF
108 NEGL AX
109 ADDL $1, BX // i++
110
111 E4: CMPL BX, BP // i < n
112 JL L4
113
114 MOVL AX, c+28(FP)
115 RET
116
117
118 // func shlVU(z, x []Word, s uint) (c Word)
119 TEXT ·shlVU(SB),NOSPLIT,$0
120 MOVL z_len+4(FP), BX // i = z
121 SUBL $1, BX // i--
122 JL X8b // i < 0 (n <= 0)
123
124 // n > 0
125 MOVL z+0(FP), DI
126 MOVL x+12(FP), SI
127 MOVL s+24(FP), CX
128 MOVL (SI)(BX*4), AX // w1 = x[n-1]
129 MOVL $0, DX
130 SHLL CX, AX, DX // w1>>ŝ
131 MOVL DX, c+28(FP)
132
133 CMPL BX, $0
134 JLE X8a // i <= 0
135
136 // i > 0
137 L8: MOVL AX, DX // w = w1
138 MOVL -4(SI)(BX*4), AX // w1 = x[i-1]
139 SHLL CX, AX, DX // w<<s | w1>>ŝ
140 MOVL DX, (DI)(BX*4) // z[i] = w<<s | w1>>ŝ
141 SUBL $1, BX // i--
142 JG L8 // i > 0
143
144 // i <= 0
145 X8a: SHLL CX, AX // w1<<s
146 MOVL AX, (DI) // z[0] = w1<<s
147 RET
148
149 X8b: MOVL $0, c+28(FP)
150 RET
151
152
153 // func shrVU(z, x []Word, s uint) (c Word)
154 TEXT ·shrVU(SB),NOSPLIT,$0
155 MOVL z_len+4(FP), BP
156 SUBL $1, BP // n--
157 JL X9b // n < 0 (n <= 0)
158
159 // n > 0
160 MOVL z+0(FP), DI
161 MOVL x+12(FP), SI
162 MOVL s+24(FP), CX
163 MOVL (SI), AX // w1 = x[0]
164 MOVL $0, DX
165 SHRL CX, AX, DX // w1<<ŝ
166 MOVL DX, c+28(FP)
167
168 MOVL $0, BX // i = 0
169 JMP E9
170
171 // i < n-1
172 L9: MOVL AX, DX // w = w1
173 MOVL 4(SI)(BX*4), AX // w1 = x[i+1]
174 SHRL CX, AX, DX // w>>s | w1<<ŝ
175 MOVL DX, (DI)(BX*4) // z[i] = w>>s | w1<<ŝ
176 ADDL $1, BX // i++
177
178 E9: CMPL BX, BP
179 JL L9 // i < n-1
180
181 // i >= n-1
182 X9a: SHRL CX, AX // w1>>s
183 MOVL AX, (DI)(BP*4) // z[n-1] = w1>>s
184 RET
185
186 X9b: MOVL $0, c+28(FP)
187 RET
188
189
190 // func mulAddVWW(z, x []Word, y, r Word) (c Word)
191 TEXT ·mulAddVWW(SB),NOSPLIT,$0
192 MOVL z+0(FP), DI
193 MOVL x+12(FP), SI
194 MOVL y+24(FP), BP
195 MOVL r+28(FP), CX // c = r
196 MOVL z_len+4(FP), BX
197 LEAL (DI)(BX*4), DI
198 LEAL (SI)(BX*4), SI
199 NEGL BX // i = -n
200 JMP E5
201
202 L5: MOVL (SI)(BX*4), AX
203 MULL BP
204 ADDL CX, AX
205 ADCL $0, DX
206 MOVL AX, (DI)(BX*4)
207 MOVL DX, CX
208 ADDL $1, BX // i++
209
210 E5: CMPL BX, $0 // i < 0
211 JL L5
212
213 MOVL CX, c+32(FP)
214 RET
215
216
217 // func addMulVVW(z, x []Word, y Word) (c Word)
218 TEXT ·addMulVVW(SB),NOSPLIT,$0
219 MOVL z+0(FP), DI
220 MOVL x+12(FP), SI
221 MOVL y+24(FP), BP
222 MOVL z_len+4(FP), BX
223 LEAL (DI)(BX*4), DI
224 LEAL (SI)(BX*4), SI
225 NEGL BX // i = -n
226 MOVL $0, CX // c = 0
227 JMP E6
228
229 L6: MOVL (SI)(BX*4), AX
230 MULL BP
231 ADDL CX, AX
232 ADCL $0, DX
233 ADDL AX, (DI)(BX*4)
234 ADCL $0, DX
235 MOVL DX, CX
236 ADDL $1, BX // i++
237
238 E6: CMPL BX, $0 // i < 0
239 JL L6
240
241 MOVL CX, c+28(FP)
242 RET
243
244
245
246
View as plain text