Source file src/math/big/float.go
1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file implements multi-precision floating-point numbers. 6 // Like in the GNU MPFR library (https://www.mpfr.org/), operands 7 // can be of mixed precision. Unlike MPFR, the rounding mode is 8 // not specified with each operation, but with each operand. The 9 // rounding mode of the result operand determines the rounding 10 // mode of an operation. This is a from-scratch implementation. 11 12 package big 13 14 import ( 15 "fmt" 16 "math" 17 "math/bits" 18 ) 19 20 const debugFloat = false // enable for debugging 21 22 // A nonzero finite Float represents a multi-precision floating point number 23 // 24 // sign × mantissa × 2**exponent 25 // 26 // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp. 27 // A Float may also be zero (+0, -0) or infinite (+Inf, -Inf). 28 // All Floats are ordered, and the ordering of two Floats x and y 29 // is defined by x.Cmp(y). 30 // 31 // Each Float value also has a precision, rounding mode, and accuracy. 32 // The precision is the maximum number of mantissa bits available to 33 // represent the value. The rounding mode specifies how a result should 34 // be rounded to fit into the mantissa bits, and accuracy describes the 35 // rounding error with respect to the exact result. 36 // 37 // Unless specified otherwise, all operations (including setters) that 38 // specify a *Float variable for the result (usually via the receiver 39 // with the exception of MantExp), round the numeric result according 40 // to the precision and rounding mode of the result variable. 41 // 42 // If the provided result precision is 0 (see below), it is set to the 43 // precision of the argument with the largest precision value before any 44 // rounding takes place, and the rounding mode remains unchanged. Thus, 45 // uninitialized Floats provided as result arguments will have their 46 // precision set to a reasonable value determined by the operands, and 47 // their mode is the zero value for RoundingMode (ToNearestEven). 48 // 49 // By setting the desired precision to 24 or 53 and using matching rounding 50 // mode (typically ToNearestEven), Float operations produce the same results 51 // as the corresponding float32 or float64 IEEE-754 arithmetic for operands 52 // that correspond to normal (i.e., not denormal) float32 or float64 numbers. 53 // Exponent underflow and overflow lead to a 0 or an Infinity for different 54 // values than IEEE-754 because Float exponents have a much larger range. 55 // 56 // The zero (uninitialized) value for a Float is ready to use and represents 57 // the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven. 58 // 59 // Operations always take pointer arguments (*Float) rather 60 // than Float values, and each unique Float value requires 61 // its own unique *Float pointer. To "copy" a Float value, 62 // an existing (or newly allocated) Float must be set to 63 // a new value using the Float.Set method; shallow copies 64 // of Floats are not supported and may lead to errors. 65 type Float struct { 66 prec uint32 67 mode RoundingMode 68 acc Accuracy 69 form form 70 neg bool 71 mant nat 72 exp int32 73 } 74 75 // An ErrNaN panic is raised by a Float operation that would lead to 76 // a NaN under IEEE-754 rules. An ErrNaN implements the error interface. 77 type ErrNaN struct { 78 msg string 79 } 80 81 func (err ErrNaN) Error() string { 82 return err.msg 83 } 84 85 // NewFloat allocates and returns a new Float set to x, 86 // with precision 53 and rounding mode ToNearestEven. 87 // NewFloat panics with ErrNaN if x is a NaN. 88 func NewFloat(x float64) *Float { 89 if math.IsNaN(x) { 90 panic(ErrNaN{"NewFloat(NaN)"}) 91 } 92 return new(Float).SetFloat64(x) 93 } 94 95 // Exponent and precision limits. 96 const ( 97 MaxExp = math.MaxInt32 // largest supported exponent 98 MinExp = math.MinInt32 // smallest supported exponent 99 MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited 100 ) 101 102 // Internal representation: The mantissa bits x.mant of a nonzero finite 103 // Float x are stored in a nat slice long enough to hold up to x.prec bits; 104 // the slice may (but doesn't have to) be shorter if the mantissa contains 105 // trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e., 106 // the msb is shifted all the way "to the left"). Thus, if the mantissa has 107 // trailing 0 bits or x.prec is not a multiple of the Word size _W, 108 // x.mant[0] has trailing zero bits. The msb of the mantissa corresponds 109 // to the value 0.5; the exponent x.exp shifts the binary point as needed. 110 // 111 // A zero or non-finite Float x ignores x.mant and x.exp. 112 // 113 // x form neg mant exp 114 // ---------------------------------------------------------- 115 // ±0 zero sign - - 116 // 0 < |x| < +Inf finite sign mantissa exponent 117 // ±Inf inf sign - - 118 119 // A form value describes the internal representation. 120 type form byte 121 122 // The form value order is relevant - do not change! 123 const ( 124 zero form = iota 125 finite 126 inf 127 ) 128 129 // RoundingMode determines how a Float value is rounded to the 130 // desired precision. Rounding may change the Float value; the 131 // rounding error is described by the Float's Accuracy. 132 type RoundingMode byte 133 134 // These constants define supported rounding modes. 135 const ( 136 ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven 137 ToNearestAway // == IEEE 754-2008 roundTiesToAway 138 ToZero // == IEEE 754-2008 roundTowardZero 139 AwayFromZero // no IEEE 754-2008 equivalent 140 ToNegativeInf // == IEEE 754-2008 roundTowardNegative 141 ToPositiveInf // == IEEE 754-2008 roundTowardPositive 142 ) 143 144 //go:generate stringer -type=RoundingMode 145 146 // Accuracy describes the rounding error produced by the most recent 147 // operation that generated a Float value, relative to the exact value. 148 type Accuracy int8 149 150 // Constants describing the Accuracy of a Float. 151 const ( 152 Below Accuracy = -1 153 Exact Accuracy = 0 154 Above Accuracy = +1 155 ) 156 157 //go:generate stringer -type=Accuracy 158 159 // SetPrec sets z's precision to prec and returns the (possibly) rounded 160 // value of z. Rounding occurs according to z's rounding mode if the mantissa 161 // cannot be represented in prec bits without loss of precision. 162 // SetPrec(0) maps all finite values to ±0; infinite values remain unchanged. 163 // If prec > MaxPrec, it is set to MaxPrec. 164 func (z *Float) SetPrec(prec uint) *Float { 165 z.acc = Exact // optimistically assume no rounding is needed 166 167 // special case 168 if prec == 0 { 169 z.prec = 0 170 if z.form == finite { 171 // truncate z to 0 172 z.acc = makeAcc(z.neg) 173 z.form = zero 174 } 175 return z 176 } 177 178 // general case 179 if prec > MaxPrec { 180 prec = MaxPrec 181 } 182 old := z.prec 183 z.prec = uint32(prec) 184 if z.prec < old { 185 z.round(0) 186 } 187 return z 188 } 189 190 func makeAcc(above bool) Accuracy { 191 if above { 192 return Above 193 } 194 return Below 195 } 196 197 // SetMode sets z's rounding mode to mode and returns an exact z. 198 // z remains unchanged otherwise. 199 // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact. 200 func (z *Float) SetMode(mode RoundingMode) *Float { 201 z.mode = mode 202 z.acc = Exact 203 return z 204 } 205 206 // Prec returns the mantissa precision of x in bits. 207 // The result may be 0 for |x| == 0 and |x| == Inf. 208 func (x *Float) Prec() uint { 209 return uint(x.prec) 210 } 211 212 // MinPrec returns the minimum precision required to represent x exactly 213 // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x). 214 // The result is 0 for |x| == 0 and |x| == Inf. 215 func (x *Float) MinPrec() uint { 216 if x.form != finite { 217 return 0 218 } 219 return uint(len(x.mant))*_W - x.mant.trailingZeroBits() 220 } 221 222 // Mode returns the rounding mode of x. 223 func (x *Float) Mode() RoundingMode { 224 return x.mode 225 } 226 227 // Acc returns the accuracy of x produced by the most recent 228 // operation, unless explicitly documented otherwise by that 229 // operation. 230 func (x *Float) Acc() Accuracy { 231 return x.acc 232 } 233 234 // Sign returns: 235 // 236 // -1 if x < 0 237 // 0 if x is ±0 238 // +1 if x > 0 239 // 240 func (x *Float) Sign() int { 241 if debugFloat { 242 x.validate() 243 } 244 if x.form == zero { 245 return 0 246 } 247 if x.neg { 248 return -1 249 } 250 return 1 251 } 252 253 // MantExp breaks x into its mantissa and exponent components 254 // and returns the exponent. If a non-nil mant argument is 255 // provided its value is set to the mantissa of x, with the 256 // same precision and rounding mode as x. The components 257 // satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0. 258 // Calling MantExp with a nil argument is an efficient way to 259 // get the exponent of the receiver. 260 // 261 // Special cases are: 262 // 263 // ( ±0).MantExp(mant) = 0, with mant set to ±0 264 // (±Inf).MantExp(mant) = 0, with mant set to ±Inf 265 // 266 // x and mant may be the same in which case x is set to its 267 // mantissa value. 268 func (x *Float) MantExp(mant *Float) (exp int) { 269 if debugFloat { 270 x.validate() 271 } 272 if x.form == finite { 273 exp = int(x.exp) 274 } 275 if mant != nil { 276 mant.Copy(x) 277 if mant.form == finite { 278 mant.exp = 0 279 } 280 } 281 return 282 } 283 284 func (z *Float) setExpAndRound(exp int64, sbit uint) { 285 if exp < MinExp { 286 // underflow 287 z.acc = makeAcc(z.neg) 288 z.form = zero 289 return 290 } 291 292 if exp > MaxExp { 293 // overflow 294 z.acc = makeAcc(!z.neg) 295 z.form = inf 296 return 297 } 298 299 z.form = finite 300 z.exp = int32(exp) 301 z.round(sbit) 302 } 303 304 // SetMantExp sets z to mant × 2**exp and returns z. 305 // The result z has the same precision and rounding mode 306 // as mant. SetMantExp is an inverse of MantExp but does 307 // not require 0.5 <= |mant| < 1.0. Specifically, for a 308 // given x of type *Float, SetMantExp relates to MantExp 309 // as follows: 310 // 311 // mant := new(Float) 312 // new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0 313 // 314 // Special cases are: 315 // 316 // z.SetMantExp( ±0, exp) = ±0 317 // z.SetMantExp(±Inf, exp) = ±Inf 318 // 319 // z and mant may be the same in which case z's exponent 320 // is set to exp. 321 func (z *Float) SetMantExp(mant *Float, exp int) *Float { 322 if debugFloat { 323 z.validate() 324 mant.validate() 325 } 326 z.Copy(mant) 327 328 if z.form == finite { 329 // 0 < |mant| < +Inf 330 z.setExpAndRound(int64(z.exp)+int64(exp), 0) 331 } 332 return z 333 } 334 335 // Signbit reports whether x is negative or negative zero. 336 func (x *Float) Signbit() bool { 337 return x.neg 338 } 339 340 // IsInf reports whether x is +Inf or -Inf. 341 func (x *Float) IsInf() bool { 342 return x.form == inf 343 } 344 345 // IsInt reports whether x is an integer. 346 // ±Inf values are not integers. 347 func (x *Float) IsInt() bool { 348 if debugFloat { 349 x.validate() 350 } 351 // special cases 352 if x.form != finite { 353 return x.form == zero 354 } 355 // x.form == finite 356 if x.exp <= 0 { 357 return false 358 } 359 // x.exp > 0 360 return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa 361 } 362 363 // debugging support 364 func (x *Float) validate() { 365 if !debugFloat { 366 // avoid performance bugs 367 panic("validate called but debugFloat is not set") 368 } 369 if x.form != finite { 370 return 371 } 372 m := len(x.mant) 373 if m == 0 { 374 panic("nonzero finite number with empty mantissa") 375 } 376 const msb = 1 << (_W - 1) 377 if x.mant[m-1]&msb == 0 { 378 panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0))) 379 } 380 if x.prec == 0 { 381 panic("zero precision finite number") 382 } 383 } 384 385 // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly. 386 // sbit must be 0 or 1 and summarizes any "sticky bit" information one might 387 // have before calling round. z's mantissa must be normalized (with the msb set) 388 // or empty. 389 // 390 // CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the 391 // sign of z. For correct rounding, the sign of z must be set correctly before 392 // calling round. 393 func (z *Float) round(sbit uint) { 394 if debugFloat { 395 z.validate() 396 } 397 398 z.acc = Exact 399 if z.form != finite { 400 // ±0 or ±Inf => nothing left to do 401 return 402 } 403 // z.form == finite && len(z.mant) > 0 404 // m > 0 implies z.prec > 0 (checked by validate) 405 406 m := uint32(len(z.mant)) // present mantissa length in words 407 bits := m * _W // present mantissa bits; bits > 0 408 if bits <= z.prec { 409 // mantissa fits => nothing to do 410 return 411 } 412 // bits > z.prec 413 414 // Rounding is based on two bits: the rounding bit (rbit) and the 415 // sticky bit (sbit). The rbit is the bit immediately before the 416 // z.prec leading mantissa bits (the "0.5"). The sbit is set if any 417 // of the bits before the rbit are set (the "0.25", "0.125", etc.): 418 // 419 // rbit sbit => "fractional part" 420 // 421 // 0 0 == 0 422 // 0 1 > 0 , < 0.5 423 // 1 0 == 0.5 424 // 1 1 > 0.5, < 1.0 425 426 // bits > z.prec: mantissa too large => round 427 r := uint(bits - z.prec - 1) // rounding bit position; r >= 0 428 rbit := z.mant.bit(r) & 1 // rounding bit; be safe and ensure it's a single bit 429 // The sticky bit is only needed for rounding ToNearestEven 430 // or when the rounding bit is zero. Avoid computation otherwise. 431 if sbit == 0 && (rbit == 0 || z.mode == ToNearestEven) { 432 sbit = z.mant.sticky(r) 433 } 434 sbit &= 1 // be safe and ensure it's a single bit 435 436 // cut off extra words 437 n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision 438 if m > n { 439 copy(z.mant, z.mant[m-n:]) // move n last words to front 440 z.mant = z.mant[:n] 441 } 442 443 // determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word 444 ntz := n*_W - z.prec // 0 <= ntz < _W 445 lsb := Word(1) << ntz 446 447 // round if result is inexact 448 if rbit|sbit != 0 { 449 // Make rounding decision: The result mantissa is truncated ("rounded down") 450 // by default. Decide if we need to increment, or "round up", the (unsigned) 451 // mantissa. 452 inc := false 453 switch z.mode { 454 case ToNegativeInf: 455 inc = z.neg 456 case ToZero: 457 // nothing to do 458 case ToNearestEven: 459 inc = rbit != 0 && (sbit != 0 || z.mant[0]&lsb != 0) 460 case ToNearestAway: 461 inc = rbit != 0 462 case AwayFromZero: 463 inc = true 464 case ToPositiveInf: 465 inc = !z.neg 466 default: 467 panic("unreachable") 468 } 469 470 // A positive result (!z.neg) is Above the exact result if we increment, 471 // and it's Below if we truncate (Exact results require no rounding). 472 // For a negative result (z.neg) it is exactly the opposite. 473 z.acc = makeAcc(inc != z.neg) 474 475 if inc { 476 // add 1 to mantissa 477 if addVW(z.mant, z.mant, lsb) != 0 { 478 // mantissa overflow => adjust exponent 479 if z.exp >= MaxExp { 480 // exponent overflow 481 z.form = inf 482 return 483 } 484 z.exp++ 485 // adjust mantissa: divide by 2 to compensate for exponent adjustment 486 shrVU(z.mant, z.mant, 1) 487 // set msb == carry == 1 from the mantissa overflow above 488 const msb = 1 << (_W - 1) 489 z.mant[n-1] |= msb 490 } 491 } 492 } 493 494 // zero out trailing bits in least-significant word 495 z.mant[0] &^= lsb - 1 496 497 if debugFloat { 498 z.validate() 499 } 500 } 501 502 func (z *Float) setBits64(neg bool, x uint64) *Float { 503 if z.prec == 0 { 504 z.prec = 64 505 } 506 z.acc = Exact 507 z.neg = neg 508 if x == 0 { 509 z.form = zero 510 return z 511 } 512 // x != 0 513 z.form = finite 514 s := bits.LeadingZeros64(x) 515 z.mant = z.mant.setUint64(x << uint(s)) 516 z.exp = int32(64 - s) // always fits 517 if z.prec < 64 { 518 z.round(0) 519 } 520 return z 521 } 522 523 // SetUint64 sets z to the (possibly rounded) value of x and returns z. 524 // If z's precision is 0, it is changed to 64 (and rounding will have 525 // no effect). 526 func (z *Float) SetUint64(x uint64) *Float { 527 return z.setBits64(false, x) 528 } 529 530 // SetInt64 sets z to the (possibly rounded) value of x and returns z. 531 // If z's precision is 0, it is changed to 64 (and rounding will have 532 // no effect). 533 func (z *Float) SetInt64(x int64) *Float { 534 u := x 535 if u < 0 { 536 u = -u 537 } 538 // We cannot simply call z.SetUint64(uint64(u)) and change 539 // the sign afterwards because the sign affects rounding. 540 return z.setBits64(x < 0, uint64(u)) 541 } 542 543 // SetFloat64 sets z to the (possibly rounded) value of x and returns z. 544 // If z's precision is 0, it is changed to 53 (and rounding will have 545 // no effect). SetFloat64 panics with ErrNaN if x is a NaN. 546 func (z *Float) SetFloat64(x float64) *Float { 547 if z.prec == 0 { 548 z.prec = 53 549 } 550 if math.IsNaN(x) { 551 panic(ErrNaN{"Float.SetFloat64(NaN)"}) 552 } 553 z.acc = Exact 554 z.neg = math.Signbit(x) // handle -0, -Inf correctly 555 if x == 0 { 556 z.form = zero 557 return z 558 } 559 if math.IsInf(x, 0) { 560 z.form = inf 561 return z 562 } 563 // normalized x != 0 564 z.form = finite 565 fmant, exp := math.Frexp(x) // get normalized mantissa 566 z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11) 567 z.exp = int32(exp) // always fits 568 if z.prec < 53 { 569 z.round(0) 570 } 571 return z 572 } 573 574 // fnorm normalizes mantissa m by shifting it to the left 575 // such that the msb of the most-significant word (msw) is 1. 576 // It returns the shift amount. It assumes that len(m) != 0. 577 func fnorm(m nat) int64 { 578 if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) { 579 panic("msw of mantissa is 0") 580 } 581 s := nlz(m[len(m)-1]) 582 if s > 0 { 583 c := shlVU(m, m, s) 584 if debugFloat && c != 0 { 585 panic("nlz or shlVU incorrect") 586 } 587 } 588 return int64(s) 589 } 590 591 // SetInt sets z to the (possibly rounded) value of x and returns z. 592 // If z's precision is 0, it is changed to the larger of x.BitLen() 593 // or 64 (and rounding will have no effect). 594 func (z *Float) SetInt(x *Int) *Float { 595 // TODO(gri) can be more efficient if z.prec > 0 596 // but small compared to the size of x, or if there 597 // are many trailing 0's. 598 bits := uint32(x.BitLen()) 599 if z.prec == 0 { 600 z.prec = umax32(bits, 64) 601 } 602 z.acc = Exact 603 z.neg = x.neg 604 if len(x.abs) == 0 { 605 z.form = zero 606 return z 607 } 608 // x != 0 609 z.mant = z.mant.set(x.abs) 610 fnorm(z.mant) 611 z.setExpAndRound(int64(bits), 0) 612 return z 613 } 614 615 // SetRat sets z to the (possibly rounded) value of x and returns z. 616 // If z's precision is 0, it is changed to the largest of a.BitLen(), 617 // b.BitLen(), or 64; with x = a/b. 618 func (z *Float) SetRat(x *Rat) *Float { 619 if x.IsInt() { 620 return z.SetInt(x.Num()) 621 } 622 var a, b Float 623 a.SetInt(x.Num()) 624 b.SetInt(x.Denom()) 625 if z.prec == 0 { 626 z.prec = umax32(a.prec, b.prec) 627 } 628 return z.Quo(&a, &b) 629 } 630 631 // SetInf sets z to the infinite Float -Inf if signbit is 632 // set, or +Inf if signbit is not set, and returns z. The 633 // precision of z is unchanged and the result is always 634 // Exact. 635 func (z *Float) SetInf(signbit bool) *Float { 636 z.acc = Exact 637 z.form = inf 638 z.neg = signbit 639 return z 640 } 641 642 // Set sets z to the (possibly rounded) value of x and returns z. 643 // If z's precision is 0, it is changed to the precision of x 644 // before setting z (and rounding will have no effect). 645 // Rounding is performed according to z's precision and rounding 646 // mode; and z's accuracy reports the result error relative to the 647 // exact (not rounded) result. 648 func (z *Float) Set(x *Float) *Float { 649 if debugFloat { 650 x.validate() 651 } 652 z.acc = Exact 653 if z != x { 654 z.form = x.form 655 z.neg = x.neg 656 if x.form == finite { 657 z.exp = x.exp 658 z.mant = z.mant.set(x.mant) 659 } 660 if z.prec == 0 { 661 z.prec = x.prec 662 } else if z.prec < x.prec { 663 z.round(0) 664 } 665 } 666 return z 667 } 668 669 // Copy sets z to x, with the same precision, rounding mode, and 670 // accuracy as x, and returns z. x is not changed even if z and 671 // x are the same. 672 func (z *Float) Copy(x *Float) *Float { 673 if debugFloat { 674 x.validate() 675 } 676 if z != x { 677 z.prec = x.prec 678 z.mode = x.mode 679 z.acc = x.acc 680 z.form = x.form 681 z.neg = x.neg 682 if z.form == finite { 683 z.mant = z.mant.set(x.mant) 684 z.exp = x.exp 685 } 686 } 687 return z 688 } 689 690 // msb32 returns the 32 most significant bits of x. 691 func msb32(x nat) uint32 { 692 i := len(x) - 1 693 if i < 0 { 694 return 0 695 } 696 if debugFloat && x[i]&(1<<(_W-1)) == 0 { 697 panic("x not normalized") 698 } 699 switch _W { 700 case 32: 701 return uint32(x[i]) 702 case 64: 703 return uint32(x[i] >> 32) 704 } 705 panic("unreachable") 706 } 707 708 // msb64 returns the 64 most significant bits of x. 709 func msb64(x nat) uint64 { 710 i := len(x) - 1 711 if i < 0 { 712 return 0 713 } 714 if debugFloat && x[i]&(1<<(_W-1)) == 0 { 715 panic("x not normalized") 716 } 717 switch _W { 718 case 32: 719 v := uint64(x[i]) << 32 720 if i > 0 { 721 v |= uint64(x[i-1]) 722 } 723 return v 724 case 64: 725 return uint64(x[i]) 726 } 727 panic("unreachable") 728 } 729 730 // Uint64 returns the unsigned integer resulting from truncating x 731 // towards zero. If 0 <= x <= math.MaxUint64, the result is Exact 732 // if x is an integer and Below otherwise. 733 // The result is (0, Above) for x < 0, and (math.MaxUint64, Below) 734 // for x > math.MaxUint64. 735 func (x *Float) Uint64() (uint64, Accuracy) { 736 if debugFloat { 737 x.validate() 738 } 739 740 switch x.form { 741 case finite: 742 if x.neg { 743 return 0, Above 744 } 745 // 0 < x < +Inf 746 if x.exp <= 0 { 747 // 0 < x < 1 748 return 0, Below 749 } 750 // 1 <= x < Inf 751 if x.exp <= 64 { 752 // u = trunc(x) fits into a uint64 753 u := msb64(x.mant) >> (64 - uint32(x.exp)) 754 if x.MinPrec() <= 64 { 755 return u, Exact 756 } 757 return u, Below // x truncated 758 } 759 // x too large 760 return math.MaxUint64, Below 761 762 case zero: 763 return 0, Exact 764 765 case inf: 766 if x.neg { 767 return 0, Above 768 } 769 return math.MaxUint64, Below 770 } 771 772 panic("unreachable") 773 } 774 775 // Int64 returns the integer resulting from truncating x towards zero. 776 // If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is 777 // an integer, and Above (x < 0) or Below (x > 0) otherwise. 778 // The result is (math.MinInt64, Above) for x < math.MinInt64, 779 // and (math.MaxInt64, Below) for x > math.MaxInt64. 780 func (x *Float) Int64() (int64, Accuracy) { 781 if debugFloat { 782 x.validate() 783 } 784 785 switch x.form { 786 case finite: 787 // 0 < |x| < +Inf 788 acc := makeAcc(x.neg) 789 if x.exp <= 0 { 790 // 0 < |x| < 1 791 return 0, acc 792 } 793 // x.exp > 0 794 795 // 1 <= |x| < +Inf 796 if x.exp <= 63 { 797 // i = trunc(x) fits into an int64 (excluding math.MinInt64) 798 i := int64(msb64(x.mant) >> (64 - uint32(x.exp))) 799 if x.neg { 800 i = -i 801 } 802 if x.MinPrec() <= uint(x.exp) { 803 return i, Exact 804 } 805 return i, acc // x truncated 806 } 807 if x.neg { 808 // check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64)) 809 if x.exp == 64 && x.MinPrec() == 1 { 810 acc = Exact 811 } 812 return math.MinInt64, acc 813 } 814 // x too large 815 return math.MaxInt64, Below 816 817 case zero: 818 return 0, Exact 819 820 case inf: 821 if x.neg { 822 return math.MinInt64, Above 823 } 824 return math.MaxInt64, Below 825 } 826 827 panic("unreachable") 828 } 829 830 // Float32 returns the float32 value nearest to x. If x is too small to be 831 // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result 832 // is (0, Below) or (-0, Above), respectively, depending on the sign of x. 833 // If x is too large to be represented by a float32 (|x| > math.MaxFloat32), 834 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 835 func (x *Float) Float32() (float32, Accuracy) { 836 if debugFloat { 837 x.validate() 838 } 839 840 switch x.form { 841 case finite: 842 // 0 < |x| < +Inf 843 844 const ( 845 fbits = 32 // float size 846 mbits = 23 // mantissa size (excluding implicit msb) 847 ebits = fbits - mbits - 1 // 8 exponent size 848 bias = 1<<(ebits-1) - 1 // 127 exponent bias 849 dmin = 1 - bias - mbits // -149 smallest unbiased exponent (denormal) 850 emin = 1 - bias // -126 smallest unbiased exponent (normal) 851 emax = bias // 127 largest unbiased exponent (normal) 852 ) 853 854 // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa. 855 e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 856 857 // Compute precision p for float32 mantissa. 858 // If the exponent is too small, we have a denormal number before 859 // rounding and fewer than p mantissa bits of precision available 860 // (the exponent remains fixed but the mantissa gets shifted right). 861 p := mbits + 1 // precision of normal float 862 if e < emin { 863 // recompute precision 864 p = mbits + 1 - emin + int(e) 865 // If p == 0, the mantissa of x is shifted so much to the right 866 // that its msb falls immediately to the right of the float32 867 // mantissa space. In other words, if the smallest denormal is 868 // considered "1.0", for p == 0, the mantissa value m is >= 0.5. 869 // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. 870 // If m == 0.5, it is rounded down to even, i.e., 0.0. 871 // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. 872 if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { 873 // underflow to ±0 874 if x.neg { 875 var z float32 876 return -z, Above 877 } 878 return 0.0, Below 879 } 880 // otherwise, round up 881 // We handle p == 0 explicitly because it's easy and because 882 // Float.round doesn't support rounding to 0 bits of precision. 883 if p == 0 { 884 if x.neg { 885 return -math.SmallestNonzeroFloat32, Below 886 } 887 return math.SmallestNonzeroFloat32, Above 888 } 889 } 890 // p > 0 891 892 // round 893 var r Float 894 r.prec = uint32(p) 895 r.Set(x) 896 e = r.exp - 1 897 898 // Rounding may have caused r to overflow to ±Inf 899 // (rounding never causes underflows to 0). 900 // If the exponent is too large, also overflow to ±Inf. 901 if r.form == inf || e > emax { 902 // overflow 903 if x.neg { 904 return float32(math.Inf(-1)), Below 905 } 906 return float32(math.Inf(+1)), Above 907 } 908 // e <= emax 909 910 // Determine sign, biased exponent, and mantissa. 911 var sign, bexp, mant uint32 912 if x.neg { 913 sign = 1 << (fbits - 1) 914 } 915 916 // Rounding may have caused a denormal number to 917 // become normal. Check again. 918 if e < emin { 919 // denormal number: recompute precision 920 // Since rounding may have at best increased precision 921 // and we have eliminated p <= 0 early, we know p > 0. 922 // bexp == 0 for denormals 923 p = mbits + 1 - emin + int(e) 924 mant = msb32(r.mant) >> uint(fbits-p) 925 } else { 926 // normal number: emin <= e <= emax 927 bexp = uint32(e+bias) << mbits 928 mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 929 } 930 931 return math.Float32frombits(sign | bexp | mant), r.acc 932 933 case zero: 934 if x.neg { 935 var z float32 936 return -z, Exact 937 } 938 return 0.0, Exact 939 940 case inf: 941 if x.neg { 942 return float32(math.Inf(-1)), Exact 943 } 944 return float32(math.Inf(+1)), Exact 945 } 946 947 panic("unreachable") 948 } 949 950 // Float64 returns the float64 value nearest to x. If x is too small to be 951 // represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result 952 // is (0, Below) or (-0, Above), respectively, depending on the sign of x. 953 // If x is too large to be represented by a float64 (|x| > math.MaxFloat64), 954 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 955 func (x *Float) Float64() (float64, Accuracy) { 956 if debugFloat { 957 x.validate() 958 } 959 960 switch x.form { 961 case finite: 962 // 0 < |x| < +Inf 963 964 const ( 965 fbits = 64 // float size 966 mbits = 52 // mantissa size (excluding implicit msb) 967 ebits = fbits - mbits - 1 // 11 exponent size 968 bias = 1<<(ebits-1) - 1 // 1023 exponent bias 969 dmin = 1 - bias - mbits // -1074 smallest unbiased exponent (denormal) 970 emin = 1 - bias // -1022 smallest unbiased exponent (normal) 971 emax = bias // 1023 largest unbiased exponent (normal) 972 ) 973 974 // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa. 975 e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 976 977 // Compute precision p for float64 mantissa. 978 // If the exponent is too small, we have a denormal number before 979 // rounding and fewer than p mantissa bits of precision available 980 // (the exponent remains fixed but the mantissa gets shifted right). 981 p := mbits + 1 // precision of normal float 982 if e < emin { 983 // recompute precision 984 p = mbits + 1 - emin + int(e) 985 // If p == 0, the mantissa of x is shifted so much to the right 986 // that its msb falls immediately to the right of the float64 987 // mantissa space. In other words, if the smallest denormal is 988 // considered "1.0", for p == 0, the mantissa value m is >= 0.5. 989 // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. 990 // If m == 0.5, it is rounded down to even, i.e., 0.0. 991 // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. 992 if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { 993 // underflow to ±0 994 if x.neg { 995 var z float64 996 return -z, Above 997 } 998 return 0.0, Below 999 } 1000 // otherwise, round up 1001 // We handle p == 0 explicitly because it's easy and because 1002 // Float.round doesn't support rounding to 0 bits of precision. 1003 if p == 0 { 1004 if x.neg { 1005 return -math.SmallestNonzeroFloat64, Below 1006 } 1007 return math.SmallestNonzeroFloat64, Above 1008 } 1009 } 1010 // p > 0 1011 1012 // round 1013 var r Float 1014 r.prec = uint32(p) 1015 r.Set(x) 1016 e = r.exp - 1 1017 1018 // Rounding may have caused r to overflow to ±Inf 1019 // (rounding never causes underflows to 0). 1020 // If the exponent is too large, also overflow to ±Inf. 1021 if r.form == inf || e > emax { 1022 // overflow 1023 if x.neg { 1024 return math.Inf(-1), Below 1025 } 1026 return math.Inf(+1), Above 1027 } 1028 // e <= emax 1029 1030 // Determine sign, biased exponent, and mantissa. 1031 var sign, bexp, mant uint64 1032 if x.neg { 1033 sign = 1 << (fbits - 1) 1034 } 1035 1036 // Rounding may have caused a denormal number to 1037 // become normal. Check again. 1038 if e < emin { 1039 // denormal number: recompute precision 1040 // Since rounding may have at best increased precision 1041 // and we have eliminated p <= 0 early, we know p > 0. 1042 // bexp == 0 for denormals 1043 p = mbits + 1 - emin + int(e) 1044 mant = msb64(r.mant) >> uint(fbits-p) 1045 } else { 1046 // normal number: emin <= e <= emax 1047 bexp = uint64(e+bias) << mbits 1048 mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 1049 } 1050 1051 return math.Float64frombits(sign | bexp | mant), r.acc 1052 1053 case zero: 1054 if x.neg { 1055 var z float64 1056 return -z, Exact 1057 } 1058 return 0.0, Exact 1059 1060 case inf: 1061 if x.neg { 1062 return math.Inf(-1), Exact 1063 } 1064 return math.Inf(+1), Exact 1065 } 1066 1067 panic("unreachable") 1068 } 1069 1070 // Int returns the result of truncating x towards zero; 1071 // or nil if x is an infinity. 1072 // The result is Exact if x.IsInt(); otherwise it is Below 1073 // for x > 0, and Above for x < 0. 1074 // If a non-nil *Int argument z is provided, Int stores 1075 // the result in z instead of allocating a new Int. 1076 func (x *Float) Int(z *Int) (*Int, Accuracy) { 1077 if debugFloat { 1078 x.validate() 1079 } 1080 1081 if z == nil && x.form <= finite { 1082 z = new(Int) 1083 } 1084 1085 switch x.form { 1086 case finite: 1087 // 0 < |x| < +Inf 1088 acc := makeAcc(x.neg) 1089 if x.exp <= 0 { 1090 // 0 < |x| < 1 1091 return z.SetInt64(0), acc 1092 } 1093 // x.exp > 0 1094 1095 // 1 <= |x| < +Inf 1096 // determine minimum required precision for x 1097 allBits := uint(len(x.mant)) * _W 1098 exp := uint(x.exp) 1099 if x.MinPrec() <= exp { 1100 acc = Exact 1101 } 1102 // shift mantissa as needed 1103 if z == nil { 1104 z = new(Int) 1105 } 1106 z.neg = x.neg 1107 switch { 1108 case exp > allBits: 1109 z.abs = z.abs.shl(x.mant, exp-allBits) 1110 default: 1111 z.abs = z.abs.set(x.mant) 1112 case exp < allBits: 1113 z.abs = z.abs.shr(x.mant, allBits-exp) 1114 } 1115 return z, acc 1116 1117 case zero: 1118 return z.SetInt64(0), Exact 1119 1120 case inf: 1121 return nil, makeAcc(x.neg) 1122 } 1123 1124 panic("unreachable") 1125 } 1126 1127 // Rat returns the rational number corresponding to x; 1128 // or nil if x is an infinity. 1129 // The result is Exact if x is not an Inf. 1130 // If a non-nil *Rat argument z is provided, Rat stores 1131 // the result in z instead of allocating a new Rat. 1132 func (x *Float) Rat(z *Rat) (*Rat, Accuracy) { 1133 if debugFloat { 1134 x.validate() 1135 } 1136 1137 if z == nil && x.form <= finite { 1138 z = new(Rat) 1139 } 1140 1141 switch x.form { 1142 case finite: 1143 // 0 < |x| < +Inf 1144 allBits := int32(len(x.mant)) * _W 1145 // build up numerator and denominator 1146 z.a.neg = x.neg 1147 switch { 1148 case x.exp > allBits: 1149 z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits)) 1150 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1151 // z already in normal form 1152 default: 1153 z.a.abs = z.a.abs.set(x.mant) 1154 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1155 // z already in normal form 1156 case x.exp < allBits: 1157 z.a.abs = z.a.abs.set(x.mant) 1158 t := z.b.abs.setUint64(1) 1159 z.b.abs = t.shl(t, uint(allBits-x.exp)) 1160 z.norm() 1161 } 1162 return z, Exact 1163 1164 case zero: 1165 return z.SetInt64(0), Exact 1166 1167 case inf: 1168 return nil, makeAcc(x.neg) 1169 } 1170 1171 panic("unreachable") 1172 } 1173 1174 // Abs sets z to the (possibly rounded) value |x| (the absolute value of x) 1175 // and returns z. 1176 func (z *Float) Abs(x *Float) *Float { 1177 z.Set(x) 1178 z.neg = false 1179 return z 1180 } 1181 1182 // Neg sets z to the (possibly rounded) value of x with its sign negated, 1183 // and returns z. 1184 func (z *Float) Neg(x *Float) *Float { 1185 z.Set(x) 1186 z.neg = !z.neg 1187 return z 1188 } 1189 1190 func validateBinaryOperands(x, y *Float) { 1191 if !debugFloat { 1192 // avoid performance bugs 1193 panic("validateBinaryOperands called but debugFloat is not set") 1194 } 1195 if len(x.mant) == 0 { 1196 panic("empty mantissa for x") 1197 } 1198 if len(y.mant) == 0 { 1199 panic("empty mantissa for y") 1200 } 1201 } 1202 1203 // z = x + y, ignoring signs of x and y for the addition 1204 // but using the sign of z for rounding the result. 1205 // x and y must have a non-empty mantissa and valid exponent. 1206 func (z *Float) uadd(x, y *Float) { 1207 // Note: This implementation requires 2 shifts most of the 1208 // time. It is also inefficient if exponents or precisions 1209 // differ by wide margins. The following article describes 1210 // an efficient (but much more complicated) implementation 1211 // compatible with the internal representation used here: 1212 // 1213 // Vincent Lefèvre: "The Generic Multiple-Precision Floating- 1214 // Point Addition With Exact Rounding (as in the MPFR Library)" 1215 // http://www.vinc17.net/research/papers/rnc6.pdf 1216 1217 if debugFloat { 1218 validateBinaryOperands(x, y) 1219 } 1220 1221 // compute exponents ex, ey for mantissa with "binary point" 1222 // on the right (mantissa.0) - use int64 to avoid overflow 1223 ex := int64(x.exp) - int64(len(x.mant))*_W 1224 ey := int64(y.exp) - int64(len(y.mant))*_W 1225 1226 al := alias(z.mant, x.mant) || alias(z.mant, y.mant) 1227 1228 // TODO(gri) having a combined add-and-shift primitive 1229 // could make this code significantly faster 1230 switch { 1231 case ex < ey: 1232 if al { 1233 t := nat(nil).shl(y.mant, uint(ey-ex)) 1234 z.mant = z.mant.add(x.mant, t) 1235 } else { 1236 z.mant = z.mant.shl(y.mant, uint(ey-ex)) 1237 z.mant = z.mant.add(x.mant, z.mant) 1238 } 1239 default: 1240 // ex == ey, no shift needed 1241 z.mant = z.mant.add(x.mant, y.mant) 1242 case ex > ey: 1243 if al { 1244 t := nat(nil).shl(x.mant, uint(ex-ey)) 1245 z.mant = z.mant.add(t, y.mant) 1246 } else { 1247 z.mant = z.mant.shl(x.mant, uint(ex-ey)) 1248 z.mant = z.mant.add(z.mant, y.mant) 1249 } 1250 ex = ey 1251 } 1252 // len(z.mant) > 0 1253 1254 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1255 } 1256 1257 // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction 1258 // but using the sign of z for rounding the result. 1259 // x and y must have a non-empty mantissa and valid exponent. 1260 func (z *Float) usub(x, y *Float) { 1261 // This code is symmetric to uadd. 1262 // We have not factored the common code out because 1263 // eventually uadd (and usub) should be optimized 1264 // by special-casing, and the code will diverge. 1265 1266 if debugFloat { 1267 validateBinaryOperands(x, y) 1268 } 1269 1270 ex := int64(x.exp) - int64(len(x.mant))*_W 1271 ey := int64(y.exp) - int64(len(y.mant))*_W 1272 1273 al := alias(z.mant, x.mant) || alias(z.mant, y.mant) 1274 1275 switch { 1276 case ex < ey: 1277 if al { 1278 t := nat(nil).shl(y.mant, uint(ey-ex)) 1279 z.mant = t.sub(x.mant, t) 1280 } else { 1281 z.mant = z.mant.shl(y.mant, uint(ey-ex)) 1282 z.mant = z.mant.sub(x.mant, z.mant) 1283 } 1284 default: 1285 // ex == ey, no shift needed 1286 z.mant = z.mant.sub(x.mant, y.mant) 1287 case ex > ey: 1288 if al { 1289 t := nat(nil).shl(x.mant, uint(ex-ey)) 1290 z.mant = t.sub(t, y.mant) 1291 } else { 1292 z.mant = z.mant.shl(x.mant, uint(ex-ey)) 1293 z.mant = z.mant.sub(z.mant, y.mant) 1294 } 1295 ex = ey 1296 } 1297 1298 // operands may have canceled each other out 1299 if len(z.mant) == 0 { 1300 z.acc = Exact 1301 z.form = zero 1302 z.neg = false 1303 return 1304 } 1305 // len(z.mant) > 0 1306 1307 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1308 } 1309 1310 // z = x * y, ignoring signs of x and y for the multiplication 1311 // but using the sign of z for rounding the result. 1312 // x and y must have a non-empty mantissa and valid exponent. 1313 func (z *Float) umul(x, y *Float) { 1314 if debugFloat { 1315 validateBinaryOperands(x, y) 1316 } 1317 1318 // Note: This is doing too much work if the precision 1319 // of z is less than the sum of the precisions of x 1320 // and y which is often the case (e.g., if all floats 1321 // have the same precision). 1322 // TODO(gri) Optimize this for the common case. 1323 1324 e := int64(x.exp) + int64(y.exp) 1325 if x == y { 1326 z.mant = z.mant.sqr(x.mant) 1327 } else { 1328 z.mant = z.mant.mul(x.mant, y.mant) 1329 } 1330 z.setExpAndRound(e-fnorm(z.mant), 0) 1331 } 1332 1333 // z = x / y, ignoring signs of x and y for the division 1334 // but using the sign of z for rounding the result. 1335 // x and y must have a non-empty mantissa and valid exponent. 1336 func (z *Float) uquo(x, y *Float) { 1337 if debugFloat { 1338 validateBinaryOperands(x, y) 1339 } 1340 1341 // mantissa length in words for desired result precision + 1 1342 // (at least one extra bit so we get the rounding bit after 1343 // the division) 1344 n := int(z.prec/_W) + 1 1345 1346 // compute adjusted x.mant such that we get enough result precision 1347 xadj := x.mant 1348 if d := n - len(x.mant) + len(y.mant); d > 0 { 1349 // d extra words needed => add d "0 digits" to x 1350 xadj = make(nat, len(x.mant)+d) 1351 copy(xadj[d:], x.mant) 1352 } 1353 // TODO(gri): If we have too many digits (d < 0), we should be able 1354 // to shorten x for faster division. But we must be extra careful 1355 // with rounding in that case. 1356 1357 // Compute d before division since there may be aliasing of x.mant 1358 // (via xadj) or y.mant with z.mant. 1359 d := len(xadj) - len(y.mant) 1360 1361 // divide 1362 var r nat 1363 z.mant, r = z.mant.div(nil, xadj, y.mant) 1364 e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W 1365 1366 // The result is long enough to include (at least) the rounding bit. 1367 // If there's a non-zero remainder, the corresponding fractional part 1368 // (if it were computed), would have a non-zero sticky bit (if it were 1369 // zero, it couldn't have a non-zero remainder). 1370 var sbit uint 1371 if len(r) > 0 { 1372 sbit = 1 1373 } 1374 1375 z.setExpAndRound(e-fnorm(z.mant), sbit) 1376 } 1377 1378 // ucmp returns -1, 0, or +1, depending on whether 1379 // |x| < |y|, |x| == |y|, or |x| > |y|. 1380 // x and y must have a non-empty mantissa and valid exponent. 1381 func (x *Float) ucmp(y *Float) int { 1382 if debugFloat { 1383 validateBinaryOperands(x, y) 1384 } 1385 1386 switch { 1387 case x.exp < y.exp: 1388 return -1 1389 case x.exp > y.exp: 1390 return +1 1391 } 1392 // x.exp == y.exp 1393 1394 // compare mantissas 1395 i := len(x.mant) 1396 j := len(y.mant) 1397 for i > 0 || j > 0 { 1398 var xm, ym Word 1399 if i > 0 { 1400 i-- 1401 xm = x.mant[i] 1402 } 1403 if j > 0 { 1404 j-- 1405 ym = y.mant[j] 1406 } 1407 switch { 1408 case xm < ym: 1409 return -1 1410 case xm > ym: 1411 return +1 1412 } 1413 } 1414 1415 return 0 1416 } 1417 1418 // Handling of sign bit as defined by IEEE 754-2008, section 6.3: 1419 // 1420 // When neither the inputs nor result are NaN, the sign of a product or 1421 // quotient is the exclusive OR of the operands’ signs; the sign of a sum, 1422 // or of a difference x−y regarded as a sum x+(−y), differs from at most 1423 // one of the addends’ signs; and the sign of the result of conversions, 1424 // the quantize operation, the roundToIntegral operations, and the 1425 // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand. 1426 // These rules shall apply even when operands or results are zero or infinite. 1427 // 1428 // When the sum of two operands with opposite signs (or the difference of 1429 // two operands with like signs) is exactly zero, the sign of that sum (or 1430 // difference) shall be +0 in all rounding-direction attributes except 1431 // roundTowardNegative; under that attribute, the sign of an exact zero 1432 // sum (or difference) shall be −0. However, x+x = x−(−x) retains the same 1433 // sign as x even when x is zero. 1434 // 1435 // See also: https://play.golang.org/p/RtH3UCt5IH 1436 1437 // Add sets z to the rounded sum x+y and returns z. If z's precision is 0, 1438 // it is changed to the larger of x's or y's precision before the operation. 1439 // Rounding is performed according to z's precision and rounding mode; and 1440 // z's accuracy reports the result error relative to the exact (not rounded) 1441 // result. Add panics with ErrNaN if x and y are infinities with opposite 1442 // signs. The value of z is undefined in that case. 1443 func (z *Float) Add(x, y *Float) *Float { 1444 if debugFloat { 1445 x.validate() 1446 y.validate() 1447 } 1448 1449 if z.prec == 0 { 1450 z.prec = umax32(x.prec, y.prec) 1451 } 1452 1453 if x.form == finite && y.form == finite { 1454 // x + y (common case) 1455 1456 // Below we set z.neg = x.neg, and when z aliases y this will 1457 // change the y operand's sign. This is fine, because if an 1458 // operand aliases the receiver it'll be overwritten, but we still 1459 // want the original x.neg and y.neg values when we evaluate 1460 // x.neg != y.neg, so we need to save y.neg before setting z.neg. 1461 yneg := y.neg 1462 1463 z.neg = x.neg 1464 if x.neg == yneg { 1465 // x + y == x + y 1466 // (-x) + (-y) == -(x + y) 1467 z.uadd(x, y) 1468 } else { 1469 // x + (-y) == x - y == -(y - x) 1470 // (-x) + y == y - x == -(x - y) 1471 if x.ucmp(y) > 0 { 1472 z.usub(x, y) 1473 } else { 1474 z.neg = !z.neg 1475 z.usub(y, x) 1476 } 1477 } 1478 if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact { 1479 z.neg = true 1480 } 1481 return z 1482 } 1483 1484 if x.form == inf && y.form == inf && x.neg != y.neg { 1485 // +Inf + -Inf 1486 // -Inf + +Inf 1487 // value of z is undefined but make sure it's valid 1488 z.acc = Exact 1489 z.form = zero 1490 z.neg = false 1491 panic(ErrNaN{"addition of infinities with opposite signs"}) 1492 } 1493 1494 if x.form == zero && y.form == zero { 1495 // ±0 + ±0 1496 z.acc = Exact 1497 z.form = zero 1498 z.neg = x.neg && y.neg // -0 + -0 == -0 1499 return z 1500 } 1501 1502 if x.form == inf || y.form == zero { 1503 // ±Inf + y 1504 // x + ±0 1505 return z.Set(x) 1506 } 1507 1508 // ±0 + y 1509 // x + ±Inf 1510 return z.Set(y) 1511 } 1512 1513 // Sub sets z to the rounded difference x-y and returns z. 1514 // Precision, rounding, and accuracy reporting are as for Add. 1515 // Sub panics with ErrNaN if x and y are infinities with equal 1516 // signs. The value of z is undefined in that case. 1517 func (z *Float) Sub(x, y *Float) *Float { 1518 if debugFloat { 1519 x.validate() 1520 y.validate() 1521 } 1522 1523 if z.prec == 0 { 1524 z.prec = umax32(x.prec, y.prec) 1525 } 1526 1527 if x.form == finite && y.form == finite { 1528 // x - y (common case) 1529 yneg := y.neg 1530 z.neg = x.neg 1531 if x.neg != yneg { 1532 // x - (-y) == x + y 1533 // (-x) - y == -(x + y) 1534 z.uadd(x, y) 1535 } else { 1536 // x - y == x - y == -(y - x) 1537 // (-x) - (-y) == y - x == -(x - y) 1538 if x.ucmp(y) > 0 { 1539 z.usub(x, y) 1540 } else { 1541 z.neg = !z.neg 1542 z.usub(y, x) 1543 } 1544 } 1545 if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact { 1546 z.neg = true 1547 } 1548 return z 1549 } 1550 1551 if x.form == inf && y.form == inf && x.neg == y.neg { 1552 // +Inf - +Inf 1553 // -Inf - -Inf 1554 // value of z is undefined but make sure it's valid 1555 z.acc = Exact 1556 z.form = zero 1557 z.neg = false 1558 panic(ErrNaN{"subtraction of infinities with equal signs"}) 1559 } 1560 1561 if x.form == zero && y.form == zero { 1562 // ±0 - ±0 1563 z.acc = Exact 1564 z.form = zero 1565 z.neg = x.neg && !y.neg // -0 - +0 == -0 1566 return z 1567 } 1568 1569 if x.form == inf || y.form == zero { 1570 // ±Inf - y 1571 // x - ±0 1572 return z.Set(x) 1573 } 1574 1575 // ±0 - y 1576 // x - ±Inf 1577 return z.Neg(y) 1578 } 1579 1580 // Mul sets z to the rounded product x*y and returns z. 1581 // Precision, rounding, and accuracy reporting are as for Add. 1582 // Mul panics with ErrNaN if one operand is zero and the other 1583 // operand an infinity. The value of z is undefined in that case. 1584 func (z *Float) Mul(x, y *Float) *Float { 1585 if debugFloat { 1586 x.validate() 1587 y.validate() 1588 } 1589 1590 if z.prec == 0 { 1591 z.prec = umax32(x.prec, y.prec) 1592 } 1593 1594 z.neg = x.neg != y.neg 1595 1596 if x.form == finite && y.form == finite { 1597 // x * y (common case) 1598 z.umul(x, y) 1599 return z 1600 } 1601 1602 z.acc = Exact 1603 if x.form == zero && y.form == inf || x.form == inf && y.form == zero { 1604 // ±0 * ±Inf 1605 // ±Inf * ±0 1606 // value of z is undefined but make sure it's valid 1607 z.form = zero 1608 z.neg = false 1609 panic(ErrNaN{"multiplication of zero with infinity"}) 1610 } 1611 1612 if x.form == inf || y.form == inf { 1613 // ±Inf * y 1614 // x * ±Inf 1615 z.form = inf 1616 return z 1617 } 1618 1619 // ±0 * y 1620 // x * ±0 1621 z.form = zero 1622 return z 1623 } 1624 1625 // Quo sets z to the rounded quotient x/y and returns z. 1626 // Precision, rounding, and accuracy reporting are as for Add. 1627 // Quo panics with ErrNaN if both operands are zero or infinities. 1628 // The value of z is undefined in that case. 1629 func (z *Float) Quo(x, y *Float) *Float { 1630 if debugFloat { 1631 x.validate() 1632 y.validate() 1633 } 1634 1635 if z.prec == 0 { 1636 z.prec = umax32(x.prec, y.prec) 1637 } 1638 1639 z.neg = x.neg != y.neg 1640 1641 if x.form == finite && y.form == finite { 1642 // x / y (common case) 1643 z.uquo(x, y) 1644 return z 1645 } 1646 1647 z.acc = Exact 1648 if x.form == zero && y.form == zero || x.form == inf && y.form == inf { 1649 // ±0 / ±0 1650 // ±Inf / ±Inf 1651 // value of z is undefined but make sure it's valid 1652 z.form = zero 1653 z.neg = false 1654 panic(ErrNaN{"division of zero by zero or infinity by infinity"}) 1655 } 1656 1657 if x.form == zero || y.form == inf { 1658 // ±0 / y 1659 // x / ±Inf 1660 z.form = zero 1661 return z 1662 } 1663 1664 // x / ±0 1665 // ±Inf / y 1666 z.form = inf 1667 return z 1668 } 1669 1670 // Cmp compares x and y and returns: 1671 // 1672 // -1 if x < y 1673 // 0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf) 1674 // +1 if x > y 1675 // 1676 func (x *Float) Cmp(y *Float) int { 1677 if debugFloat { 1678 x.validate() 1679 y.validate() 1680 } 1681 1682 mx := x.ord() 1683 my := y.ord() 1684 switch { 1685 case mx < my: 1686 return -1 1687 case mx > my: 1688 return +1 1689 } 1690 // mx == my 1691 1692 // only if |mx| == 1 we have to compare the mantissae 1693 switch mx { 1694 case -1: 1695 return y.ucmp(x) 1696 case +1: 1697 return x.ucmp(y) 1698 } 1699 1700 return 0 1701 } 1702 1703 // ord classifies x and returns: 1704 // 1705 // -2 if -Inf == x 1706 // -1 if -Inf < x < 0 1707 // 0 if x == 0 (signed or unsigned) 1708 // +1 if 0 < x < +Inf 1709 // +2 if x == +Inf 1710 // 1711 func (x *Float) ord() int { 1712 var m int 1713 switch x.form { 1714 case finite: 1715 m = 1 1716 case zero: 1717 return 0 1718 case inf: 1719 m = 2 1720 } 1721 if x.neg { 1722 m = -m 1723 } 1724 return m 1725 } 1726 1727 func umax32(x, y uint32) uint32 { 1728 if x > y { 1729 return x 1730 } 1731 return y 1732 } 1733