1
2
3
4
5
6
7 package elliptic
8
9 import (
10 "crypto/subtle"
11 "internal/cpu"
12 "math/big"
13 "unsafe"
14 )
15
16 const (
17 offsetS390xHasVX = unsafe.Offsetof(cpu.S390X.HasVX)
18 offsetS390xHasVE1 = unsafe.Offsetof(cpu.S390X.HasVXE)
19 )
20
21 type p256CurveFast struct {
22 *CurveParams
23 }
24
25 type p256Point struct {
26 x [32]byte
27 y [32]byte
28 z [32]byte
29 }
30
31 var (
32 p256 Curve
33 p256PreFast *[37][64]p256Point
34 )
35
36
37 func p256MulInternalTrampolineSetup()
38
39
40 func p256SqrInternalTrampolineSetup()
41
42
43 func p256MulInternalVX()
44
45
46 func p256MulInternalVMSL()
47
48
49 func p256SqrInternalVX()
50
51
52 func p256SqrInternalVMSL()
53
54 func initP256Arch() {
55 if cpu.S390X.HasVX {
56 p256 = p256CurveFast{p256Params}
57 initTable()
58 return
59 }
60
61
62 p256 = p256Curve{p256Params}
63 return
64 }
65
66 func (curve p256CurveFast) Params() *CurveParams {
67 return curve.CurveParams
68 }
69
70
71
72
73
74 func p256SqrAsm(res, in1 []byte)
75
76
77 func p256MulAsm(res, in1, in2 []byte)
78
79
80 func p256Sqr(res, in []byte) {
81 p256SqrAsm(res, in)
82 }
83
84
85
86
87 func p256FromMont(res, in []byte)
88
89
90
91
92 func p256NegCond(val *p256Point, cond int)
93
94
95
96
97 func p256MovCond(res, a, b *p256Point, cond int)
98
99
100
101
102 func p256Select(point *p256Point, table []p256Point, idx int)
103
104
105 func p256SelectBase(point *p256Point, table []p256Point, idx int)
106
107
108
109
110 func p256OrdMul(res, in1, in2 []byte)
111
112
113 func p256OrdSqr(res, in []byte, n int) {
114 copy(res, in)
115 for i := 0; i < n; i += 1 {
116 p256OrdMul(res, res, res)
117 }
118 }
119
120
121
122
123
124
125
126 func p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int)
127
128
129
130
131 func p256PointAddAsm(P3, P1, P2 *p256Point) int
132
133
134 func p256PointDoubleAsm(P3, P1 *p256Point)
135
136 func (curve p256CurveFast) Inverse(k *big.Int) *big.Int {
137 if k.Cmp(p256Params.N) >= 0 {
138
139 reducedK := new(big.Int).Mod(k, p256Params.N)
140 k = reducedK
141 }
142
143
144
145 var table [15][32]byte
146
147 x := fromBig(k)
148
149
150
151
152
153
154 RR := []byte{0x66, 0xe1, 0x2d, 0x94, 0xf3, 0xd9, 0x56, 0x20, 0x28, 0x45, 0xb2, 0x39, 0x2b, 0x6b, 0xec, 0x59,
155 0x46, 0x99, 0x79, 0x9c, 0x49, 0xbd, 0x6f, 0xa6, 0x83, 0x24, 0x4c, 0x95, 0xbe, 0x79, 0xee, 0xa2}
156
157 p256OrdMul(table[0][:], x, RR)
158
159
160
161 for i := 2; i < 16; i += 2 {
162 p256OrdSqr(table[i-1][:], table[(i/2)-1][:], 1)
163 p256OrdMul(table[i][:], table[i-1][:], table[0][:])
164 }
165
166 copy(x, table[14][:])
167
168 p256OrdSqr(x[0:32], x[0:32], 4)
169 p256OrdMul(x[0:32], x[0:32], table[14][:])
170 t := make([]byte, 32)
171 copy(t, x)
172
173 p256OrdSqr(x, x, 8)
174 p256OrdMul(x, x, t)
175 copy(t, x)
176
177 p256OrdSqr(x, x, 16)
178 p256OrdMul(x, x, t)
179 copy(t, x)
180
181 p256OrdSqr(x, x, 64)
182 p256OrdMul(x, x, t)
183 p256OrdSqr(x, x, 32)
184 p256OrdMul(x, x, t)
185
186
187 expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4,
188 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
189 for i := 0; i < 32; i++ {
190 p256OrdSqr(x, x, 4)
191 p256OrdMul(x, x, table[expLo[i]-1][:])
192 }
193
194
195
196 one := []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}
198 p256OrdMul(x, x, one)
199
200 return new(big.Int).SetBytes(x)
201 }
202
203
204 func fromBig(big *big.Int) []byte {
205
206 res := big.Bytes()
207 if 32 == len(res) {
208 return res
209 }
210 t := make([]byte, 32)
211 offset := 32 - len(res)
212 for i := len(res) - 1; i >= 0; i-- {
213 t[i+offset] = res[i]
214 }
215 return t
216 }
217
218
219
220 func p256GetMultiplier(in []byte) []byte {
221 n := new(big.Int).SetBytes(in)
222
223 if n.Cmp(p256Params.N) >= 0 {
224 n.Mod(n, p256Params.N)
225 }
226 return fromBig(n)
227 }
228
229
230
231
232 var rr = []byte{0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe,
233 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03}
234
235
236 var one = []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
237 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}
238
239 func maybeReduceModP(in *big.Int) *big.Int {
240 if in.Cmp(p256Params.P) < 0 {
241 return in
242 }
243 return new(big.Int).Mod(in, p256Params.P)
244 }
245
246 func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
247 var r1, r2 p256Point
248 scalarReduced := p256GetMultiplier(baseScalar)
249 r1IsInfinity := scalarIsZero(scalarReduced)
250 r1.p256BaseMult(scalarReduced)
251
252 copy(r2.x[:], fromBig(maybeReduceModP(bigX)))
253 copy(r2.y[:], fromBig(maybeReduceModP(bigY)))
254 copy(r2.z[:], one)
255 p256MulAsm(r2.x[:], r2.x[:], rr[:])
256 p256MulAsm(r2.y[:], r2.y[:], rr[:])
257
258 scalarReduced = p256GetMultiplier(scalar)
259 r2IsInfinity := scalarIsZero(scalarReduced)
260 r2.p256ScalarMult(p256GetMultiplier(scalar))
261
262 var sum, double p256Point
263 pointsEqual := p256PointAddAsm(&sum, &r1, &r2)
264 p256PointDoubleAsm(&double, &r1)
265 p256MovCond(&sum, &double, &sum, pointsEqual)
266 p256MovCond(&sum, &r1, &sum, r2IsInfinity)
267 p256MovCond(&sum, &r2, &sum, r1IsInfinity)
268 return sum.p256PointToAffine()
269 }
270
271 func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
272 var r p256Point
273 r.p256BaseMult(p256GetMultiplier(scalar))
274 return r.p256PointToAffine()
275 }
276
277 func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
278 var r p256Point
279 copy(r.x[:], fromBig(maybeReduceModP(bigX)))
280 copy(r.y[:], fromBig(maybeReduceModP(bigY)))
281 copy(r.z[:], one)
282 p256MulAsm(r.x[:], r.x[:], rr[:])
283 p256MulAsm(r.y[:], r.y[:], rr[:])
284 r.p256ScalarMult(p256GetMultiplier(scalar))
285 return r.p256PointToAffine()
286 }
287
288
289
290 func scalarIsZero(scalar []byte) int {
291 b := byte(0)
292 for _, s := range scalar {
293 b |= s
294 }
295 return subtle.ConstantTimeByteEq(b, 0)
296 }
297
298 func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
299 zInv := make([]byte, 32)
300 zInvSq := make([]byte, 32)
301
302 p256Inverse(zInv, p.z[:])
303 p256Sqr(zInvSq, zInv)
304 p256MulAsm(zInv, zInv, zInvSq)
305
306 p256MulAsm(zInvSq, p.x[:], zInvSq)
307 p256MulAsm(zInv, p.y[:], zInv)
308
309 p256FromMont(zInvSq, zInvSq)
310 p256FromMont(zInv, zInv)
311
312 return new(big.Int).SetBytes(zInvSq), new(big.Int).SetBytes(zInv)
313 }
314
315
316 func p256Inverse(out, in []byte) {
317 var stack [6 * 32]byte
318 p2 := stack[32*0 : 32*0+32]
319 p4 := stack[32*1 : 32*1+32]
320 p8 := stack[32*2 : 32*2+32]
321 p16 := stack[32*3 : 32*3+32]
322 p32 := stack[32*4 : 32*4+32]
323
324 p256Sqr(out, in)
325 p256MulAsm(p2, out, in)
326
327 p256Sqr(out, p2)
328 p256Sqr(out, out)
329 p256MulAsm(p4, out, p2)
330
331 p256Sqr(out, p4)
332 p256Sqr(out, out)
333 p256Sqr(out, out)
334 p256Sqr(out, out)
335 p256MulAsm(p8, out, p4)
336
337 p256Sqr(out, p8)
338
339 for i := 0; i < 7; i++ {
340 p256Sqr(out, out)
341 }
342 p256MulAsm(p16, out, p8)
343
344 p256Sqr(out, p16)
345 for i := 0; i < 15; i++ {
346 p256Sqr(out, out)
347 }
348 p256MulAsm(p32, out, p16)
349
350 p256Sqr(out, p32)
351
352 for i := 0; i < 31; i++ {
353 p256Sqr(out, out)
354 }
355 p256MulAsm(out, out, in)
356
357 for i := 0; i < 32*4; i++ {
358 p256Sqr(out, out)
359 }
360 p256MulAsm(out, out, p32)
361
362 for i := 0; i < 32; i++ {
363 p256Sqr(out, out)
364 }
365 p256MulAsm(out, out, p32)
366
367 for i := 0; i < 16; i++ {
368 p256Sqr(out, out)
369 }
370 p256MulAsm(out, out, p16)
371
372 for i := 0; i < 8; i++ {
373 p256Sqr(out, out)
374 }
375 p256MulAsm(out, out, p8)
376
377 p256Sqr(out, out)
378 p256Sqr(out, out)
379 p256Sqr(out, out)
380 p256Sqr(out, out)
381 p256MulAsm(out, out, p4)
382
383 p256Sqr(out, out)
384 p256Sqr(out, out)
385 p256MulAsm(out, out, p2)
386
387 p256Sqr(out, out)
388 p256Sqr(out, out)
389 p256MulAsm(out, out, in)
390 }
391
392 func boothW5(in uint) (int, int) {
393 var s uint = ^((in >> 5) - 1)
394 var d uint = (1 << 6) - in - 1
395 d = (d & s) | (in & (^s))
396 d = (d >> 1) + (d & 1)
397 return int(d), int(s & 1)
398 }
399
400 func boothW7(in uint) (int, int) {
401 var s uint = ^((in >> 7) - 1)
402 var d uint = (1 << 8) - in - 1
403 d = (d & s) | (in & (^s))
404 d = (d >> 1) + (d & 1)
405 return int(d), int(s & 1)
406 }
407
408 func initTable() {
409 p256PreFast = new([37][64]p256Point)
410 basePoint := p256Point{
411 x: [32]byte{0x18, 0x90, 0x5f, 0x76, 0xa5, 0x37, 0x55, 0xc6, 0x79, 0xfb, 0x73, 0x2b, 0x77, 0x62, 0x25, 0x10,
412 0x75, 0xba, 0x95, 0xfc, 0x5f, 0xed, 0xb6, 0x01, 0x79, 0xe7, 0x30, 0xd4, 0x18, 0xa9, 0x14, 0x3c},
413 y: [32]byte{0x85, 0x71, 0xff, 0x18, 0x25, 0x88, 0x5d, 0x85, 0xd2, 0xe8, 0x86, 0x88, 0xdd, 0x21, 0xf3, 0x25,
414 0x8b, 0x4a, 0xb8, 0xe4, 0xba, 0x19, 0xe4, 0x5c, 0xdd, 0xf2, 0x53, 0x57, 0xce, 0x95, 0x56, 0x0a},
415 z: [32]byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
416 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
417 }
418
419 t1 := new(p256Point)
420 t2 := new(p256Point)
421 *t2 = basePoint
422
423 zInv := make([]byte, 32)
424 zInvSq := make([]byte, 32)
425 for j := 0; j < 64; j++ {
426 *t1 = *t2
427 for i := 0; i < 37; i++ {
428
429 if i != 0 {
430 for k := 0; k < 7; k++ {
431 p256PointDoubleAsm(t1, t1)
432 }
433 }
434
435
436 p256Inverse(zInv, t1.z[:])
437 p256Sqr(zInvSq, zInv)
438 p256MulAsm(zInv, zInv, zInvSq)
439
440 p256MulAsm(t1.x[:], t1.x[:], zInvSq)
441 p256MulAsm(t1.y[:], t1.y[:], zInv)
442
443 copy(t1.z[:], basePoint.z[:])
444
445 copy(p256PreFast[i][j].x[:], t1.x[:])
446 copy(p256PreFast[i][j].y[:], t1.y[:])
447 }
448 if j == 0 {
449 p256PointDoubleAsm(t2, &basePoint)
450 } else {
451 p256PointAddAsm(t2, t2, &basePoint)
452 }
453 }
454 }
455
456 func (p *p256Point) p256BaseMult(scalar []byte) {
457 wvalue := (uint(scalar[31]) << 1) & 0xff
458 sel, sign := boothW7(uint(wvalue))
459 p256SelectBase(p, p256PreFast[0][:], sel)
460 p256NegCond(p, sign)
461
462 copy(p.z[:], one[:])
463 var t0 p256Point
464
465 copy(t0.z[:], one[:])
466
467 index := uint(6)
468 zero := sel
469
470 for i := 1; i < 37; i++ {
471 if index < 247 {
472 wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0xff
473 } else {
474 wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0xff
475 }
476 index += 7
477 sel, sign = boothW7(uint(wvalue))
478 p256SelectBase(&t0, p256PreFast[i][:], sel)
479 p256PointAddAffineAsm(p, p, &t0, sign, sel, zero)
480 zero |= sel
481 }
482 }
483
484 func (p *p256Point) p256ScalarMult(scalar []byte) {
485
486
487 var precomp [16]p256Point
488 var t0, t1, t2, t3 p256Point
489
490
491 *&precomp[0] = *p
492
493 p256PointDoubleAsm(&t0, p)
494 p256PointDoubleAsm(&t1, &t0)
495 p256PointDoubleAsm(&t2, &t1)
496 p256PointDoubleAsm(&t3, &t2)
497 *&precomp[1] = t0
498 *&precomp[3] = t1
499 *&precomp[7] = t2
500 *&precomp[15] = t3
501
502 p256PointAddAsm(&t0, &t0, p)
503 p256PointAddAsm(&t1, &t1, p)
504 p256PointAddAsm(&t2, &t2, p)
505 *&precomp[2] = t0
506 *&precomp[4] = t1
507 *&precomp[8] = t2
508
509 p256PointDoubleAsm(&t0, &t0)
510 p256PointDoubleAsm(&t1, &t1)
511 *&precomp[5] = t0
512 *&precomp[9] = t1
513
514 p256PointAddAsm(&t2, &t0, p)
515 p256PointAddAsm(&t1, &t1, p)
516 *&precomp[6] = t2
517 *&precomp[10] = t1
518
519 p256PointDoubleAsm(&t0, &t0)
520 p256PointDoubleAsm(&t2, &t2)
521 *&precomp[11] = t0
522 *&precomp[13] = t2
523
524 p256PointAddAsm(&t0, &t0, p)
525 p256PointAddAsm(&t2, &t2, p)
526 *&precomp[12] = t0
527 *&precomp[14] = t2
528
529
530 index := uint(254)
531 var sel, sign int
532
533 wvalue := (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
534 sel, _ = boothW5(uint(wvalue))
535 p256Select(p, precomp[:], sel)
536 zero := sel
537
538 for index > 4 {
539 index -= 5
540 p256PointDoubleAsm(p, p)
541 p256PointDoubleAsm(p, p)
542 p256PointDoubleAsm(p, p)
543 p256PointDoubleAsm(p, p)
544 p256PointDoubleAsm(p, p)
545
546 if index < 247 {
547 wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0x3f
548 } else {
549 wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
550 }
551
552 sel, sign = boothW5(uint(wvalue))
553
554 p256Select(&t0, precomp[:], sel)
555 p256NegCond(&t0, sign)
556 p256PointAddAsm(&t1, p, &t0)
557 p256MovCond(&t1, &t1, p, sel)
558 p256MovCond(p, &t1, &t0, zero)
559 zero |= sel
560 }
561
562 p256PointDoubleAsm(p, p)
563 p256PointDoubleAsm(p, p)
564 p256PointDoubleAsm(p, p)
565 p256PointDoubleAsm(p, p)
566 p256PointDoubleAsm(p, p)
567
568 wvalue = (uint(scalar[31]) << 1) & 0x3f
569 sel, sign = boothW5(uint(wvalue))
570
571 p256Select(&t0, precomp[:], sel)
572 p256NegCond(&t0, sign)
573 p256PointAddAsm(&t1, p, &t0)
574 p256MovCond(&t1, &t1, p, sel)
575 p256MovCond(p, &t1, &t0, zero)
576 }
577
View as plain text