Source file
src/testing/benchmark.go
1
2
3
4
5 package testing
6
7 import (
8 "flag"
9 "fmt"
10 "internal/race"
11 "internal/sysinfo"
12 "io"
13 "math"
14 "os"
15 "runtime"
16 "sort"
17 "strconv"
18 "strings"
19 "sync"
20 "sync/atomic"
21 "time"
22 "unicode"
23 )
24
25 func initBenchmarkFlags() {
26 matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`")
27 benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks")
28 flag.Var(&benchTime, "test.benchtime", "run each benchmark for duration `d`")
29 }
30
31 var (
32 matchBenchmarks *string
33 benchmarkMemory *bool
34
35 benchTime = durationOrCountFlag{d: 1 * time.Second}
36 )
37
38 type durationOrCountFlag struct {
39 d time.Duration
40 n int
41 allowZero bool
42 }
43
44 func (f *durationOrCountFlag) String() string {
45 if f.n > 0 {
46 return fmt.Sprintf("%dx", f.n)
47 }
48 return f.d.String()
49 }
50
51 func (f *durationOrCountFlag) Set(s string) error {
52 if strings.HasSuffix(s, "x") {
53 n, err := strconv.ParseInt(s[:len(s)-1], 10, 0)
54 if err != nil || n < 0 || (!f.allowZero && n == 0) {
55 return fmt.Errorf("invalid count")
56 }
57 *f = durationOrCountFlag{n: int(n)}
58 return nil
59 }
60 d, err := time.ParseDuration(s)
61 if err != nil || d < 0 || (!f.allowZero && d == 0) {
62 return fmt.Errorf("invalid duration")
63 }
64 *f = durationOrCountFlag{d: d}
65 return nil
66 }
67
68
69 var benchmarkLock sync.Mutex
70
71
72 var memStats runtime.MemStats
73
74
75
76 type InternalBenchmark struct {
77 Name string
78 F func(b *B)
79 }
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94 type B struct {
95 common
96 importPath string
97 context *benchContext
98 N int
99 previousN int
100 previousDuration time.Duration
101 benchFunc func(b *B)
102 benchTime durationOrCountFlag
103 bytes int64
104 missingBytes bool
105 timerOn bool
106 showAllocResult bool
107 result BenchmarkResult
108 parallelism int
109
110 startAllocs uint64
111 startBytes uint64
112
113 netAllocs uint64
114 netBytes uint64
115
116 extra map[string]float64
117 }
118
119
120
121
122 func (b *B) StartTimer() {
123 if !b.timerOn {
124 runtime.ReadMemStats(&memStats)
125 b.startAllocs = memStats.Mallocs
126 b.startBytes = memStats.TotalAlloc
127 b.start = time.Now()
128 b.timerOn = true
129 }
130 }
131
132
133
134
135 func (b *B) StopTimer() {
136 if b.timerOn {
137 b.duration += time.Since(b.start)
138 runtime.ReadMemStats(&memStats)
139 b.netAllocs += memStats.Mallocs - b.startAllocs
140 b.netBytes += memStats.TotalAlloc - b.startBytes
141 b.timerOn = false
142 }
143 }
144
145
146
147
148 func (b *B) ResetTimer() {
149 if b.extra == nil {
150
151
152 b.extra = make(map[string]float64, 16)
153 } else {
154 for k := range b.extra {
155 delete(b.extra, k)
156 }
157 }
158 if b.timerOn {
159 runtime.ReadMemStats(&memStats)
160 b.startAllocs = memStats.Mallocs
161 b.startBytes = memStats.TotalAlloc
162 b.start = time.Now()
163 }
164 b.duration = 0
165 b.netAllocs = 0
166 b.netBytes = 0
167 }
168
169
170
171 func (b *B) SetBytes(n int64) { b.bytes = n }
172
173
174
175
176 func (b *B) ReportAllocs() {
177 b.showAllocResult = true
178 }
179
180
181 func (b *B) runN(n int) {
182 benchmarkLock.Lock()
183 defer benchmarkLock.Unlock()
184 defer b.runCleanup(normalPanic)
185
186
187 runtime.GC()
188 b.raceErrors = -race.Errors()
189 b.N = n
190 b.parallelism = 1
191 b.ResetTimer()
192 b.StartTimer()
193 b.benchFunc(b)
194 b.StopTimer()
195 b.previousN = n
196 b.previousDuration = b.duration
197 b.raceErrors += race.Errors()
198 if b.raceErrors > 0 {
199 b.Errorf("race detected during execution of benchmark")
200 }
201 }
202
203 func min(x, y int64) int64 {
204 if x > y {
205 return y
206 }
207 return x
208 }
209
210 func max(x, y int64) int64 {
211 if x < y {
212 return y
213 }
214 return x
215 }
216
217
218
219 func (b *B) run1() bool {
220 if ctx := b.context; ctx != nil {
221
222 if n := len(b.name) + ctx.extLen + 1; n > ctx.maxLen {
223 ctx.maxLen = n + 8
224 }
225 }
226 go func() {
227
228
229 defer func() {
230 b.signal <- true
231 }()
232
233 b.runN(1)
234 }()
235 <-b.signal
236 if b.failed {
237 fmt.Fprintf(b.w, "--- FAIL: %s\n%s", b.name, b.output)
238 return false
239 }
240
241
242 b.mu.RLock()
243 finished := b.finished
244 b.mu.RUnlock()
245 if atomic.LoadInt32(&b.hasSub) != 0 || finished {
246 tag := "BENCH"
247 if b.skipped {
248 tag = "SKIP"
249 }
250 if b.chatty != nil && (len(b.output) > 0 || finished) {
251 b.trimOutput()
252 fmt.Fprintf(b.w, "--- %s: %s\n%s", tag, b.name, b.output)
253 }
254 return false
255 }
256 return true
257 }
258
259 var labelsOnce sync.Once
260
261
262
263 func (b *B) run() {
264 labelsOnce.Do(func() {
265 fmt.Fprintf(b.w, "goos: %s\n", runtime.GOOS)
266 fmt.Fprintf(b.w, "goarch: %s\n", runtime.GOARCH)
267 if b.importPath != "" {
268 fmt.Fprintf(b.w, "pkg: %s\n", b.importPath)
269 }
270 if cpu := sysinfo.CPU.Name(); cpu != "" {
271 fmt.Fprintf(b.w, "cpu: %s\n", cpu)
272 }
273 })
274 if b.context != nil {
275
276 b.context.processBench(b)
277 } else {
278
279 b.doBench()
280 }
281 }
282
283 func (b *B) doBench() BenchmarkResult {
284 go b.launch()
285 <-b.signal
286 return b.result
287 }
288
289
290
291
292
293 func (b *B) launch() {
294
295
296 defer func() {
297 b.signal <- true
298 }()
299
300
301 if b.benchTime.n > 0 {
302
303
304
305 if b.benchTime.n > 1 {
306 b.runN(b.benchTime.n)
307 }
308 } else {
309 d := b.benchTime.d
310 for n := int64(1); !b.failed && b.duration < d && n < 1e9; {
311 last := n
312
313 goalns := d.Nanoseconds()
314 prevIters := int64(b.N)
315 prevns := b.duration.Nanoseconds()
316 if prevns <= 0 {
317
318 prevns = 1
319 }
320
321
322
323
324
325 n = goalns * prevIters / prevns
326
327 n += n / 5
328
329 n = min(n, 100*last)
330
331 n = max(n, last+1)
332
333 n = min(n, 1e9)
334 b.runN(int(n))
335 }
336 }
337 b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes, b.extra}
338 }
339
340
341
342
343
344
345
346
347
348
349 func (b *B) ReportMetric(n float64, unit string) {
350 if unit == "" {
351 panic("metric unit must not be empty")
352 }
353 if strings.IndexFunc(unit, unicode.IsSpace) >= 0 {
354 panic("metric unit must not contain whitespace")
355 }
356 b.extra[unit] = n
357 }
358
359
360 type BenchmarkResult struct {
361 N int
362 T time.Duration
363 Bytes int64
364 MemAllocs uint64
365 MemBytes uint64
366
367
368 Extra map[string]float64
369 }
370
371
372 func (r BenchmarkResult) NsPerOp() int64 {
373 if v, ok := r.Extra["ns/op"]; ok {
374 return int64(v)
375 }
376 if r.N <= 0 {
377 return 0
378 }
379 return r.T.Nanoseconds() / int64(r.N)
380 }
381
382
383 func (r BenchmarkResult) mbPerSec() float64 {
384 if v, ok := r.Extra["MB/s"]; ok {
385 return v
386 }
387 if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 {
388 return 0
389 }
390 return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds()
391 }
392
393
394
395 func (r BenchmarkResult) AllocsPerOp() int64 {
396 if v, ok := r.Extra["allocs/op"]; ok {
397 return int64(v)
398 }
399 if r.N <= 0 {
400 return 0
401 }
402 return int64(r.MemAllocs) / int64(r.N)
403 }
404
405
406
407 func (r BenchmarkResult) AllocedBytesPerOp() int64 {
408 if v, ok := r.Extra["B/op"]; ok {
409 return int64(v)
410 }
411 if r.N <= 0 {
412 return 0
413 }
414 return int64(r.MemBytes) / int64(r.N)
415 }
416
417
418
419
420
421
422
423
424 func (r BenchmarkResult) String() string {
425 buf := new(strings.Builder)
426 fmt.Fprintf(buf, "%8d", r.N)
427
428
429 ns, ok := r.Extra["ns/op"]
430 if !ok {
431 ns = float64(r.T.Nanoseconds()) / float64(r.N)
432 }
433 if ns != 0 {
434 buf.WriteByte('\t')
435 prettyPrint(buf, ns, "ns/op")
436 }
437
438 if mbs := r.mbPerSec(); mbs != 0 {
439 fmt.Fprintf(buf, "\t%7.2f MB/s", mbs)
440 }
441
442
443
444 var extraKeys []string
445 for k := range r.Extra {
446 switch k {
447 case "ns/op", "MB/s", "B/op", "allocs/op":
448
449 continue
450 }
451 extraKeys = append(extraKeys, k)
452 }
453 sort.Strings(extraKeys)
454 for _, k := range extraKeys {
455 buf.WriteByte('\t')
456 prettyPrint(buf, r.Extra[k], k)
457 }
458 return buf.String()
459 }
460
461 func prettyPrint(w io.Writer, x float64, unit string) {
462
463
464
465
466 var format string
467 switch y := math.Abs(x); {
468 case y == 0 || y >= 999.95:
469 format = "%10.0f %s"
470 case y >= 99.995:
471 format = "%12.1f %s"
472 case y >= 9.9995:
473 format = "%13.2f %s"
474 case y >= 0.99995:
475 format = "%14.3f %s"
476 case y >= 0.099995:
477 format = "%15.4f %s"
478 case y >= 0.0099995:
479 format = "%16.5f %s"
480 case y >= 0.00099995:
481 format = "%17.6f %s"
482 default:
483 format = "%18.7f %s"
484 }
485 fmt.Fprintf(w, format, x, unit)
486 }
487
488
489 func (r BenchmarkResult) MemString() string {
490 return fmt.Sprintf("%8d B/op\t%8d allocs/op",
491 r.AllocedBytesPerOp(), r.AllocsPerOp())
492 }
493
494
495 func benchmarkName(name string, n int) string {
496 if n != 1 {
497 return fmt.Sprintf("%s-%d", name, n)
498 }
499 return name
500 }
501
502 type benchContext struct {
503 match *matcher
504
505 maxLen int
506 extLen int
507 }
508
509
510
511 func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) {
512 runBenchmarks("", matchString, benchmarks)
513 }
514
515 func runBenchmarks(importPath string, matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool {
516
517 if len(*matchBenchmarks) == 0 {
518 return true
519 }
520
521 maxprocs := 1
522 for _, procs := range cpuList {
523 if procs > maxprocs {
524 maxprocs = procs
525 }
526 }
527 ctx := &benchContext{
528 match: newMatcher(matchString, *matchBenchmarks, "-test.bench"),
529 extLen: len(benchmarkName("", maxprocs)),
530 }
531 var bs []InternalBenchmark
532 for _, Benchmark := range benchmarks {
533 if _, matched, _ := ctx.match.fullName(nil, Benchmark.Name); matched {
534 bs = append(bs, Benchmark)
535 benchName := benchmarkName(Benchmark.Name, maxprocs)
536 if l := len(benchName) + ctx.extLen + 1; l > ctx.maxLen {
537 ctx.maxLen = l
538 }
539 }
540 }
541 main := &B{
542 common: common{
543 name: "Main",
544 w: os.Stdout,
545 bench: true,
546 },
547 importPath: importPath,
548 benchFunc: func(b *B) {
549 for _, Benchmark := range bs {
550 b.Run(Benchmark.Name, Benchmark.F)
551 }
552 },
553 benchTime: benchTime,
554 context: ctx,
555 }
556 if Verbose() {
557 main.chatty = newChattyPrinter(main.w)
558 }
559 main.runN(1)
560 return !main.failed
561 }
562
563
564 func (ctx *benchContext) processBench(b *B) {
565 for i, procs := range cpuList {
566 for j := uint(0); j < *count; j++ {
567 runtime.GOMAXPROCS(procs)
568 benchName := benchmarkName(b.name, procs)
569
570
571 if b.chatty == nil {
572 fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName)
573 }
574
575 if i > 0 || j > 0 {
576 b = &B{
577 common: common{
578 signal: make(chan bool),
579 name: b.name,
580 w: b.w,
581 chatty: b.chatty,
582 bench: true,
583 },
584 benchFunc: b.benchFunc,
585 benchTime: b.benchTime,
586 }
587 b.run1()
588 }
589 r := b.doBench()
590 if b.failed {
591
592
593
594 fmt.Fprintf(b.w, "--- FAIL: %s\n%s", benchName, b.output)
595 continue
596 }
597 results := r.String()
598 if b.chatty != nil {
599 fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName)
600 }
601 if *benchmarkMemory || b.showAllocResult {
602 results += "\t" + r.MemString()
603 }
604 fmt.Fprintln(b.w, results)
605
606
607 if len(b.output) > 0 {
608 b.trimOutput()
609 fmt.Fprintf(b.w, "--- BENCH: %s\n%s", benchName, b.output)
610 }
611 if p := runtime.GOMAXPROCS(-1); p != procs {
612 fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p)
613 }
614 }
615 }
616 }
617
618
619
620
621
622
623 func (b *B) Run(name string, f func(b *B)) bool {
624
625
626 atomic.StoreInt32(&b.hasSub, 1)
627 benchmarkLock.Unlock()
628 defer benchmarkLock.Lock()
629
630 benchName, ok, partial := b.name, true, false
631 if b.context != nil {
632 benchName, ok, partial = b.context.match.fullName(&b.common, name)
633 }
634 if !ok {
635 return true
636 }
637 var pc [maxStackLen]uintptr
638 n := runtime.Callers(2, pc[:])
639 sub := &B{
640 common: common{
641 signal: make(chan bool),
642 name: benchName,
643 parent: &b.common,
644 level: b.level + 1,
645 creator: pc[:n],
646 w: b.w,
647 chatty: b.chatty,
648 bench: true,
649 },
650 importPath: b.importPath,
651 benchFunc: f,
652 benchTime: b.benchTime,
653 context: b.context,
654 }
655 if partial {
656
657
658 atomic.StoreInt32(&sub.hasSub, 1)
659 }
660
661 if b.chatty != nil {
662 labelsOnce.Do(func() {
663 fmt.Printf("goos: %s\n", runtime.GOOS)
664 fmt.Printf("goarch: %s\n", runtime.GOARCH)
665 if b.importPath != "" {
666 fmt.Printf("pkg: %s\n", b.importPath)
667 }
668 if cpu := sysinfo.CPU.Name(); cpu != "" {
669 fmt.Printf("cpu: %s\n", cpu)
670 }
671 })
672
673 fmt.Println(benchName)
674 }
675
676 if sub.run1() {
677 sub.run()
678 }
679 b.add(sub.result)
680 return !sub.failed
681 }
682
683
684
685
686 func (b *B) add(other BenchmarkResult) {
687 r := &b.result
688
689
690 r.N = 1
691 r.T += time.Duration(other.NsPerOp())
692 if other.Bytes == 0 {
693
694
695 b.missingBytes = true
696 r.Bytes = 0
697 }
698 if !b.missingBytes {
699 r.Bytes += other.Bytes
700 }
701 r.MemAllocs += uint64(other.AllocsPerOp())
702 r.MemBytes += uint64(other.AllocedBytesPerOp())
703 }
704
705
706 func (b *B) trimOutput() {
707
708
709
710 const maxNewlines = 10
711 for nlCount, j := 0, 0; j < len(b.output); j++ {
712 if b.output[j] == '\n' {
713 nlCount++
714 if nlCount >= maxNewlines {
715 b.output = append(b.output[:j], "\n\t... [output truncated]\n"...)
716 break
717 }
718 }
719 }
720 }
721
722
723 type PB struct {
724 globalN *uint64
725 grain uint64
726 cache uint64
727 bN uint64
728 }
729
730
731 func (pb *PB) Next() bool {
732 if pb.cache == 0 {
733 n := atomic.AddUint64(pb.globalN, pb.grain)
734 if n <= pb.bN {
735 pb.cache = pb.grain
736 } else if n < pb.bN+pb.grain {
737 pb.cache = pb.bN + pb.grain - n
738 } else {
739 return false
740 }
741 }
742 pb.cache--
743 return true
744 }
745
746
747
748
749
750
751
752
753
754
755
756 func (b *B) RunParallel(body func(*PB)) {
757 if b.N == 0 {
758 return
759 }
760
761
762
763 grain := uint64(0)
764 if b.previousN > 0 && b.previousDuration > 0 {
765 grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration)
766 }
767 if grain < 1 {
768 grain = 1
769 }
770
771
772 if grain > 1e4 {
773 grain = 1e4
774 }
775
776 n := uint64(0)
777 numProcs := b.parallelism * runtime.GOMAXPROCS(0)
778 var wg sync.WaitGroup
779 wg.Add(numProcs)
780 for p := 0; p < numProcs; p++ {
781 go func() {
782 defer wg.Done()
783 pb := &PB{
784 globalN: &n,
785 grain: grain,
786 bN: uint64(b.N),
787 }
788 body(pb)
789 }()
790 }
791 wg.Wait()
792 if n <= uint64(b.N) && !b.Failed() {
793 b.Fatal("RunParallel: body exited without pb.Next() == false")
794 }
795 }
796
797
798
799
800 func (b *B) SetParallelism(p int) {
801 if p >= 1 {
802 b.parallelism = p
803 }
804 }
805
806
807
808
809
810
811
812
813
814 func Benchmark(f func(b *B)) BenchmarkResult {
815 b := &B{
816 common: common{
817 signal: make(chan bool),
818 w: discard{},
819 },
820 benchFunc: f,
821 benchTime: benchTime,
822 }
823 if b.run1() {
824 b.run()
825 }
826 return b.result
827 }
828
829 type discard struct{}
830
831 func (discard) Write(b []byte) (n int, err error) { return len(b), nil }
832
View as plain text