Text file
src/math/big/arith_arm.s
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !math_big_pure_go
6 // +build !math_big_pure_go
7
8 #include "textflag.h"
9
10 // This file provides fast assembly versions for the elementary
11 // arithmetic operations on vectors implemented in arith.go.
12
13 // func addVV(z, x, y []Word) (c Word)
14 TEXT ·addVV(SB),NOSPLIT,$0
15 ADD.S $0, R0 // clear carry flag
16 MOVW z+0(FP), R1
17 MOVW z_len+4(FP), R4
18 MOVW x+12(FP), R2
19 MOVW y+24(FP), R3
20 ADD R4<<2, R1, R4
21 B E1
22 L1:
23 MOVW.P 4(R2), R5
24 MOVW.P 4(R3), R6
25 ADC.S R6, R5
26 MOVW.P R5, 4(R1)
27 E1:
28 TEQ R1, R4
29 BNE L1
30
31 MOVW $0, R0
32 MOVW.CS $1, R0
33 MOVW R0, c+36(FP)
34 RET
35
36
37 // func subVV(z, x, y []Word) (c Word)
38 // (same as addVV except for SBC instead of ADC and label names)
39 TEXT ·subVV(SB),NOSPLIT,$0
40 SUB.S $0, R0 // clear borrow flag
41 MOVW z+0(FP), R1
42 MOVW z_len+4(FP), R4
43 MOVW x+12(FP), R2
44 MOVW y+24(FP), R3
45 ADD R4<<2, R1, R4
46 B E2
47 L2:
48 MOVW.P 4(R2), R5
49 MOVW.P 4(R3), R6
50 SBC.S R6, R5
51 MOVW.P R5, 4(R1)
52 E2:
53 TEQ R1, R4
54 BNE L2
55
56 MOVW $0, R0
57 MOVW.CC $1, R0
58 MOVW R0, c+36(FP)
59 RET
60
61
62 // func addVW(z, x []Word, y Word) (c Word)
63 TEXT ·addVW(SB),NOSPLIT,$0
64 MOVW z+0(FP), R1
65 MOVW z_len+4(FP), R4
66 MOVW x+12(FP), R2
67 MOVW y+24(FP), R3
68 ADD R4<<2, R1, R4
69 TEQ R1, R4
70 BNE L3a
71 MOVW R3, c+28(FP)
72 RET
73 L3a:
74 MOVW.P 4(R2), R5
75 ADD.S R3, R5
76 MOVW.P R5, 4(R1)
77 B E3
78 L3:
79 MOVW.P 4(R2), R5
80 ADC.S $0, R5
81 MOVW.P R5, 4(R1)
82 E3:
83 TEQ R1, R4
84 BNE L3
85
86 MOVW $0, R0
87 MOVW.CS $1, R0
88 MOVW R0, c+28(FP)
89 RET
90
91
92 // func subVW(z, x []Word, y Word) (c Word)
93 TEXT ·subVW(SB),NOSPLIT,$0
94 MOVW z+0(FP), R1
95 MOVW z_len+4(FP), R4
96 MOVW x+12(FP), R2
97 MOVW y+24(FP), R3
98 ADD R4<<2, R1, R4
99 TEQ R1, R4
100 BNE L4a
101 MOVW R3, c+28(FP)
102 RET
103 L4a:
104 MOVW.P 4(R2), R5
105 SUB.S R3, R5
106 MOVW.P R5, 4(R1)
107 B E4
108 L4:
109 MOVW.P 4(R2), R5
110 SBC.S $0, R5
111 MOVW.P R5, 4(R1)
112 E4:
113 TEQ R1, R4
114 BNE L4
115
116 MOVW $0, R0
117 MOVW.CC $1, R0
118 MOVW R0, c+28(FP)
119 RET
120
121
122 // func shlVU(z, x []Word, s uint) (c Word)
123 TEXT ·shlVU(SB),NOSPLIT,$0
124 MOVW z_len+4(FP), R5
125 TEQ $0, R5
126 BEQ X7
127
128 MOVW z+0(FP), R1
129 MOVW x+12(FP), R2
130 ADD R5<<2, R2, R2
131 ADD R5<<2, R1, R5
132 MOVW s+24(FP), R3
133 TEQ $0, R3 // shift 0 is special
134 BEQ Y7
135 ADD $4, R1 // stop one word early
136 MOVW $32, R4
137 SUB R3, R4
138 MOVW $0, R7
139
140 MOVW.W -4(R2), R6
141 MOVW R6<<R3, R7
142 MOVW R6>>R4, R6
143 MOVW R6, c+28(FP)
144 B E7
145
146 L7:
147 MOVW.W -4(R2), R6
148 ORR R6>>R4, R7
149 MOVW.W R7, -4(R5)
150 MOVW R6<<R3, R7
151 E7:
152 TEQ R1, R5
153 BNE L7
154
155 MOVW R7, -4(R5)
156 RET
157
158 Y7: // copy loop, because shift 0 == shift 32
159 MOVW.W -4(R2), R6
160 MOVW.W R6, -4(R5)
161 TEQ R1, R5
162 BNE Y7
163
164 X7:
165 MOVW $0, R1
166 MOVW R1, c+28(FP)
167 RET
168
169
170 // func shrVU(z, x []Word, s uint) (c Word)
171 TEXT ·shrVU(SB),NOSPLIT,$0
172 MOVW z_len+4(FP), R5
173 TEQ $0, R5
174 BEQ X6
175
176 MOVW z+0(FP), R1
177 MOVW x+12(FP), R2
178 ADD R5<<2, R1, R5
179 MOVW s+24(FP), R3
180 TEQ $0, R3 // shift 0 is special
181 BEQ Y6
182 SUB $4, R5 // stop one word early
183 MOVW $32, R4
184 SUB R3, R4
185 MOVW $0, R7
186
187 // first word
188 MOVW.P 4(R2), R6
189 MOVW R6>>R3, R7
190 MOVW R6<<R4, R6
191 MOVW R6, c+28(FP)
192 B E6
193
194 // word loop
195 L6:
196 MOVW.P 4(R2), R6
197 ORR R6<<R4, R7
198 MOVW.P R7, 4(R1)
199 MOVW R6>>R3, R7
200 E6:
201 TEQ R1, R5
202 BNE L6
203
204 MOVW R7, 0(R1)
205 RET
206
207 Y6: // copy loop, because shift 0 == shift 32
208 MOVW.P 4(R2), R6
209 MOVW.P R6, 4(R1)
210 TEQ R1, R5
211 BNE Y6
212
213 X6:
214 MOVW $0, R1
215 MOVW R1, c+28(FP)
216 RET
217
218
219 // func mulAddVWW(z, x []Word, y, r Word) (c Word)
220 TEXT ·mulAddVWW(SB),NOSPLIT,$0
221 MOVW $0, R0
222 MOVW z+0(FP), R1
223 MOVW z_len+4(FP), R5
224 MOVW x+12(FP), R2
225 MOVW y+24(FP), R3
226 MOVW r+28(FP), R4
227 ADD R5<<2, R1, R5
228 B E8
229
230 // word loop
231 L8:
232 MOVW.P 4(R2), R6
233 MULLU R6, R3, (R7, R6)
234 ADD.S R4, R6
235 ADC R0, R7
236 MOVW.P R6, 4(R1)
237 MOVW R7, R4
238 E8:
239 TEQ R1, R5
240 BNE L8
241
242 MOVW R4, c+32(FP)
243 RET
244
245
246 // func addMulVVW(z, x []Word, y Word) (c Word)
247 TEXT ·addMulVVW(SB),NOSPLIT,$0
248 MOVW $0, R0
249 MOVW z+0(FP), R1
250 MOVW z_len+4(FP), R5
251 MOVW x+12(FP), R2
252 MOVW y+24(FP), R3
253 ADD R5<<2, R1, R5
254 MOVW $0, R4
255 B E9
256
257 // word loop
258 L9:
259 MOVW.P 4(R2), R6
260 MULLU R6, R3, (R7, R6)
261 ADD.S R4, R6
262 ADC R0, R7
263 MOVW 0(R1), R4
264 ADD.S R4, R6
265 ADC R0, R7
266 MOVW.P R6, 4(R1)
267 MOVW R7, R4
268 E9:
269 TEQ R1, R5
270 BNE L9
271
272 MOVW R4, c+28(FP)
273 RET
274
275
276
277 // func mulWW(x, y Word) (z1, z0 Word)
278 TEXT ·mulWW(SB),NOSPLIT,$0
279 MOVW x+0(FP), R1
280 MOVW y+4(FP), R2
281 MULLU R1, R2, (R4, R3)
282 MOVW R4, z1+8(FP)
283 MOVW R3, z0+12(FP)
284 RET
285
View as plain text