Text file
src/crypto/md5/md5block_arm.s
1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 //
5 // ARM version of md5block.go
6
7 #include "textflag.h"
8
9 // Register definitions
10 #define Rtable R0 // Pointer to MD5 constants table
11 #define Rdata R1 // Pointer to data to hash
12 #define Ra R2 // MD5 accumulator
13 #define Rb R3 // MD5 accumulator
14 #define Rc R4 // MD5 accumulator
15 #define Rd R5 // MD5 accumulator
16 #define Rc0 R6 // MD5 constant
17 #define Rc1 R7 // MD5 constant
18 #define Rc2 R8 // MD5 constant
19 // r9, r10 are forbidden
20 // r11 is OK provided you check the assembler that no synthetic instructions use it
21 #define Rc3 R11 // MD5 constant
22 #define Rt0 R12 // temporary
23 #define Rt1 R14 // temporary
24
25 // func block(dig *digest, p []byte)
26 // 0(FP) is *digest
27 // 4(FP) is p.array (struct Slice)
28 // 8(FP) is p.len
29 //12(FP) is p.cap
30 //
31 // Stack frame
32 #define p_end end-4(SP) // pointer to the end of data
33 #define p_data data-8(SP) // current data pointer
34 #define buf buffer-(8+4*16)(SP) //16 words temporary buffer
35 // 3 words at 4..12(R13) for called routine parameters
36
37 TEXT ·block(SB), NOSPLIT, $84-16
38 MOVW p+4(FP), Rdata // pointer to the data
39 MOVW p_len+8(FP), Rt0 // number of bytes
40 ADD Rdata, Rt0
41 MOVW Rt0, p_end // pointer to end of data
42
43 loop:
44 MOVW Rdata, p_data // Save Rdata
45 AND.S $3, Rdata, Rt0 // TST $3, Rdata not working see issue 5921
46 BEQ aligned // aligned detected - skip copy
47
48 // Copy the unaligned source data into the aligned temporary buffer
49 // memmove(to=4(R13), from=8(R13), n=12(R13)) - Corrupts all registers
50 MOVW $buf, Rtable // to
51 MOVW $64, Rc0 // n
52 MOVM.IB [Rtable,Rdata,Rc0], (R13)
53 BL runtime·memmove(SB)
54
55 // Point to the local aligned copy of the data
56 MOVW $buf, Rdata
57
58 aligned:
59 // Point to the table of constants
60 // A PC relative add would be cheaper than this
61 MOVW $·table(SB), Rtable
62
63 // Load up initial MD5 accumulator
64 MOVW dig+0(FP), Rc0
65 MOVM.IA (Rc0), [Ra,Rb,Rc,Rd]
66
67 // a += (((c^d)&b)^d) + X[index] + const
68 // a = a<<shift | a>>(32-shift) + b
69 #define ROUND1(Ra, Rb, Rc, Rd, index, shift, Rconst) \
70 EOR Rc, Rd, Rt0 ; \
71 AND Rb, Rt0 ; \
72 EOR Rd, Rt0 ; \
73 MOVW (index<<2)(Rdata), Rt1 ; \
74 ADD Rt1, Rt0 ; \
75 ADD Rconst, Rt0 ; \
76 ADD Rt0, Ra ; \
77 ADD Ra@>(32-shift), Rb, Ra ;
78
79 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
80 ROUND1(Ra, Rb, Rc, Rd, 0, 7, Rc0)
81 ROUND1(Rd, Ra, Rb, Rc, 1, 12, Rc1)
82 ROUND1(Rc, Rd, Ra, Rb, 2, 17, Rc2)
83 ROUND1(Rb, Rc, Rd, Ra, 3, 22, Rc3)
84
85 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
86 ROUND1(Ra, Rb, Rc, Rd, 4, 7, Rc0)
87 ROUND1(Rd, Ra, Rb, Rc, 5, 12, Rc1)
88 ROUND1(Rc, Rd, Ra, Rb, 6, 17, Rc2)
89 ROUND1(Rb, Rc, Rd, Ra, 7, 22, Rc3)
90
91 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
92 ROUND1(Ra, Rb, Rc, Rd, 8, 7, Rc0)
93 ROUND1(Rd, Ra, Rb, Rc, 9, 12, Rc1)
94 ROUND1(Rc, Rd, Ra, Rb, 10, 17, Rc2)
95 ROUND1(Rb, Rc, Rd, Ra, 11, 22, Rc3)
96
97 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
98 ROUND1(Ra, Rb, Rc, Rd, 12, 7, Rc0)
99 ROUND1(Rd, Ra, Rb, Rc, 13, 12, Rc1)
100 ROUND1(Rc, Rd, Ra, Rb, 14, 17, Rc2)
101 ROUND1(Rb, Rc, Rd, Ra, 15, 22, Rc3)
102
103 // a += (((b^c)&d)^c) + X[index] + const
104 // a = a<<shift | a>>(32-shift) + b
105 #define ROUND2(Ra, Rb, Rc, Rd, index, shift, Rconst) \
106 EOR Rb, Rc, Rt0 ; \
107 AND Rd, Rt0 ; \
108 EOR Rc, Rt0 ; \
109 MOVW (index<<2)(Rdata), Rt1 ; \
110 ADD Rt1, Rt0 ; \
111 ADD Rconst, Rt0 ; \
112 ADD Rt0, Ra ; \
113 ADD Ra@>(32-shift), Rb, Ra ;
114
115 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
116 ROUND2(Ra, Rb, Rc, Rd, 1, 5, Rc0)
117 ROUND2(Rd, Ra, Rb, Rc, 6, 9, Rc1)
118 ROUND2(Rc, Rd, Ra, Rb, 11, 14, Rc2)
119 ROUND2(Rb, Rc, Rd, Ra, 0, 20, Rc3)
120
121 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
122 ROUND2(Ra, Rb, Rc, Rd, 5, 5, Rc0)
123 ROUND2(Rd, Ra, Rb, Rc, 10, 9, Rc1)
124 ROUND2(Rc, Rd, Ra, Rb, 15, 14, Rc2)
125 ROUND2(Rb, Rc, Rd, Ra, 4, 20, Rc3)
126
127 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
128 ROUND2(Ra, Rb, Rc, Rd, 9, 5, Rc0)
129 ROUND2(Rd, Ra, Rb, Rc, 14, 9, Rc1)
130 ROUND2(Rc, Rd, Ra, Rb, 3, 14, Rc2)
131 ROUND2(Rb, Rc, Rd, Ra, 8, 20, Rc3)
132
133 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
134 ROUND2(Ra, Rb, Rc, Rd, 13, 5, Rc0)
135 ROUND2(Rd, Ra, Rb, Rc, 2, 9, Rc1)
136 ROUND2(Rc, Rd, Ra, Rb, 7, 14, Rc2)
137 ROUND2(Rb, Rc, Rd, Ra, 12, 20, Rc3)
138
139 // a += (b^c^d) + X[index] + const
140 // a = a<<shift | a>>(32-shift) + b
141 #define ROUND3(Ra, Rb, Rc, Rd, index, shift, Rconst) \
142 EOR Rb, Rc, Rt0 ; \
143 EOR Rd, Rt0 ; \
144 MOVW (index<<2)(Rdata), Rt1 ; \
145 ADD Rt1, Rt0 ; \
146 ADD Rconst, Rt0 ; \
147 ADD Rt0, Ra ; \
148 ADD Ra@>(32-shift), Rb, Ra ;
149
150 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
151 ROUND3(Ra, Rb, Rc, Rd, 5, 4, Rc0)
152 ROUND3(Rd, Ra, Rb, Rc, 8, 11, Rc1)
153 ROUND3(Rc, Rd, Ra, Rb, 11, 16, Rc2)
154 ROUND3(Rb, Rc, Rd, Ra, 14, 23, Rc3)
155
156 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
157 ROUND3(Ra, Rb, Rc, Rd, 1, 4, Rc0)
158 ROUND3(Rd, Ra, Rb, Rc, 4, 11, Rc1)
159 ROUND3(Rc, Rd, Ra, Rb, 7, 16, Rc2)
160 ROUND3(Rb, Rc, Rd, Ra, 10, 23, Rc3)
161
162 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
163 ROUND3(Ra, Rb, Rc, Rd, 13, 4, Rc0)
164 ROUND3(Rd, Ra, Rb, Rc, 0, 11, Rc1)
165 ROUND3(Rc, Rd, Ra, Rb, 3, 16, Rc2)
166 ROUND3(Rb, Rc, Rd, Ra, 6, 23, Rc3)
167
168 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
169 ROUND3(Ra, Rb, Rc, Rd, 9, 4, Rc0)
170 ROUND3(Rd, Ra, Rb, Rc, 12, 11, Rc1)
171 ROUND3(Rc, Rd, Ra, Rb, 15, 16, Rc2)
172 ROUND3(Rb, Rc, Rd, Ra, 2, 23, Rc3)
173
174 // a += (c^(b|^d)) + X[index] + const
175 // a = a<<shift | a>>(32-shift) + b
176 #define ROUND4(Ra, Rb, Rc, Rd, index, shift, Rconst) \
177 MVN Rd, Rt0 ; \
178 ORR Rb, Rt0 ; \
179 EOR Rc, Rt0 ; \
180 MOVW (index<<2)(Rdata), Rt1 ; \
181 ADD Rt1, Rt0 ; \
182 ADD Rconst, Rt0 ; \
183 ADD Rt0, Ra ; \
184 ADD Ra@>(32-shift), Rb, Ra ;
185
186 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
187 ROUND4(Ra, Rb, Rc, Rd, 0, 6, Rc0)
188 ROUND4(Rd, Ra, Rb, Rc, 7, 10, Rc1)
189 ROUND4(Rc, Rd, Ra, Rb, 14, 15, Rc2)
190 ROUND4(Rb, Rc, Rd, Ra, 5, 21, Rc3)
191
192 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
193 ROUND4(Ra, Rb, Rc, Rd, 12, 6, Rc0)
194 ROUND4(Rd, Ra, Rb, Rc, 3, 10, Rc1)
195 ROUND4(Rc, Rd, Ra, Rb, 10, 15, Rc2)
196 ROUND4(Rb, Rc, Rd, Ra, 1, 21, Rc3)
197
198 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
199 ROUND4(Ra, Rb, Rc, Rd, 8, 6, Rc0)
200 ROUND4(Rd, Ra, Rb, Rc, 15, 10, Rc1)
201 ROUND4(Rc, Rd, Ra, Rb, 6, 15, Rc2)
202 ROUND4(Rb, Rc, Rd, Ra, 13, 21, Rc3)
203
204 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
205 ROUND4(Ra, Rb, Rc, Rd, 4, 6, Rc0)
206 ROUND4(Rd, Ra, Rb, Rc, 11, 10, Rc1)
207 ROUND4(Rc, Rd, Ra, Rb, 2, 15, Rc2)
208 ROUND4(Rb, Rc, Rd, Ra, 9, 21, Rc3)
209
210 MOVW dig+0(FP), Rt0
211 MOVM.IA (Rt0), [Rc0,Rc1,Rc2,Rc3]
212
213 ADD Rc0, Ra
214 ADD Rc1, Rb
215 ADD Rc2, Rc
216 ADD Rc3, Rd
217
218 MOVM.IA [Ra,Rb,Rc,Rd], (Rt0)
219
220 MOVW p_data, Rdata
221 MOVW p_end, Rt0
222 ADD $64, Rdata
223 CMP Rt0, Rdata
224 BLO loop
225
226 RET
227
228 // MD5 constants table
229
230 // Round 1
231 DATA ·table+0x00(SB)/4, $0xd76aa478
232 DATA ·table+0x04(SB)/4, $0xe8c7b756
233 DATA ·table+0x08(SB)/4, $0x242070db
234 DATA ·table+0x0c(SB)/4, $0xc1bdceee
235 DATA ·table+0x10(SB)/4, $0xf57c0faf
236 DATA ·table+0x14(SB)/4, $0x4787c62a
237 DATA ·table+0x18(SB)/4, $0xa8304613
238 DATA ·table+0x1c(SB)/4, $0xfd469501
239 DATA ·table+0x20(SB)/4, $0x698098d8
240 DATA ·table+0x24(SB)/4, $0x8b44f7af
241 DATA ·table+0x28(SB)/4, $0xffff5bb1
242 DATA ·table+0x2c(SB)/4, $0x895cd7be
243 DATA ·table+0x30(SB)/4, $0x6b901122
244 DATA ·table+0x34(SB)/4, $0xfd987193
245 DATA ·table+0x38(SB)/4, $0xa679438e
246 DATA ·table+0x3c(SB)/4, $0x49b40821
247 // Round 2
248 DATA ·table+0x40(SB)/4, $0xf61e2562
249 DATA ·table+0x44(SB)/4, $0xc040b340
250 DATA ·table+0x48(SB)/4, $0x265e5a51
251 DATA ·table+0x4c(SB)/4, $0xe9b6c7aa
252 DATA ·table+0x50(SB)/4, $0xd62f105d
253 DATA ·table+0x54(SB)/4, $0x02441453
254 DATA ·table+0x58(SB)/4, $0xd8a1e681
255 DATA ·table+0x5c(SB)/4, $0xe7d3fbc8
256 DATA ·table+0x60(SB)/4, $0x21e1cde6
257 DATA ·table+0x64(SB)/4, $0xc33707d6
258 DATA ·table+0x68(SB)/4, $0xf4d50d87
259 DATA ·table+0x6c(SB)/4, $0x455a14ed
260 DATA ·table+0x70(SB)/4, $0xa9e3e905
261 DATA ·table+0x74(SB)/4, $0xfcefa3f8
262 DATA ·table+0x78(SB)/4, $0x676f02d9
263 DATA ·table+0x7c(SB)/4, $0x8d2a4c8a
264 // Round 3
265 DATA ·table+0x80(SB)/4, $0xfffa3942
266 DATA ·table+0x84(SB)/4, $0x8771f681
267 DATA ·table+0x88(SB)/4, $0x6d9d6122
268 DATA ·table+0x8c(SB)/4, $0xfde5380c
269 DATA ·table+0x90(SB)/4, $0xa4beea44
270 DATA ·table+0x94(SB)/4, $0x4bdecfa9
271 DATA ·table+0x98(SB)/4, $0xf6bb4b60
272 DATA ·table+0x9c(SB)/4, $0xbebfbc70
273 DATA ·table+0xa0(SB)/4, $0x289b7ec6
274 DATA ·table+0xa4(SB)/4, $0xeaa127fa
275 DATA ·table+0xa8(SB)/4, $0xd4ef3085
276 DATA ·table+0xac(SB)/4, $0x04881d05
277 DATA ·table+0xb0(SB)/4, $0xd9d4d039
278 DATA ·table+0xb4(SB)/4, $0xe6db99e5
279 DATA ·table+0xb8(SB)/4, $0x1fa27cf8
280 DATA ·table+0xbc(SB)/4, $0xc4ac5665
281 // Round 4
282 DATA ·table+0xc0(SB)/4, $0xf4292244
283 DATA ·table+0xc4(SB)/4, $0x432aff97
284 DATA ·table+0xc8(SB)/4, $0xab9423a7
285 DATA ·table+0xcc(SB)/4, $0xfc93a039
286 DATA ·table+0xd0(SB)/4, $0x655b59c3
287 DATA ·table+0xd4(SB)/4, $0x8f0ccc92
288 DATA ·table+0xd8(SB)/4, $0xffeff47d
289 DATA ·table+0xdc(SB)/4, $0x85845dd1
290 DATA ·table+0xe0(SB)/4, $0x6fa87e4f
291 DATA ·table+0xe4(SB)/4, $0xfe2ce6e0
292 DATA ·table+0xe8(SB)/4, $0xa3014314
293 DATA ·table+0xec(SB)/4, $0x4e0811a1
294 DATA ·table+0xf0(SB)/4, $0xf7537e82
295 DATA ·table+0xf4(SB)/4, $0xbd3af235
296 DATA ·table+0xf8(SB)/4, $0x2ad7d2bb
297 DATA ·table+0xfc(SB)/4, $0xeb86d391
298 // Global definition
299 GLOBL ·table(SB),8,$256
300
View as plain text