Source file
test/bench/go1/fasta_test.go
1
2
3
4
5 package go1
6
7 import "runtime"
8
9
10
11 var fastabytes = makefasta()
12
13 func makefasta() []byte {
14 var n int = 25e6
15 if runtime.GOARCH == "arm" || runtime.GOARCH == "mips" || runtime.GOARCH == "mips64" {
16
17
18
19
20
21 n = 25e4
22 }
23 return fasta(n)
24 }
25
26 func fasta(n int) []byte {
27 out := make(fastaBuffer, 0, 11*n)
28
29 iub := []fastaAcid{
30 {prob: 0.27, sym: 'a'},
31 {prob: 0.12, sym: 'c'},
32 {prob: 0.12, sym: 'g'},
33 {prob: 0.27, sym: 't'},
34 {prob: 0.02, sym: 'B'},
35 {prob: 0.02, sym: 'D'},
36 {prob: 0.02, sym: 'H'},
37 {prob: 0.02, sym: 'K'},
38 {prob: 0.02, sym: 'M'},
39 {prob: 0.02, sym: 'N'},
40 {prob: 0.02, sym: 'R'},
41 {prob: 0.02, sym: 'S'},
42 {prob: 0.02, sym: 'V'},
43 {prob: 0.02, sym: 'W'},
44 {prob: 0.02, sym: 'Y'},
45 }
46
47 homosapiens := []fastaAcid{
48 {prob: 0.3029549426680, sym: 'a'},
49 {prob: 0.1979883004921, sym: 'c'},
50 {prob: 0.1975473066391, sym: 'g'},
51 {prob: 0.3015094502008, sym: 't'},
52 }
53
54 alu := []byte(
55 "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" +
56 "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" +
57 "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" +
58 "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" +
59 "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" +
60 "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" +
61 "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA")
62
63 out.WriteString(">ONE Homo sapiens alu\n")
64 fastaRepeat(&out, alu, 2*n)
65 out.WriteString(">TWO IUB ambiguity codes\n")
66 fastaRandom(&out, iub, 3*n)
67 out.WriteString(">THREE Homo sapiens frequency\n")
68 fastaRandom(&out, homosapiens, 5*n)
69 return out
70 }
71
72 type fastaBuffer []byte
73
74 func (b *fastaBuffer) Flush() {
75 panic("flush")
76 }
77
78 func (b *fastaBuffer) WriteString(s string) {
79 p := b.NextWrite(len(s))
80 copy(p, s)
81 }
82
83 func (b *fastaBuffer) NextWrite(n int) []byte {
84 p := *b
85 if len(p)+n > cap(p) {
86 b.Flush()
87 p = *b
88 }
89 out := p[len(p) : len(p)+n]
90 *b = p[:len(p)+n]
91 return out
92 }
93
94 const fastaLine = 60
95
96 func fastaRepeat(out *fastaBuffer, alu []byte, n int) {
97 buf := append(alu, alu...)
98 off := 0
99 for n > 0 {
100 m := n
101 if m > fastaLine {
102 m = fastaLine
103 }
104 buf1 := out.NextWrite(m + 1)
105 copy(buf1, buf[off:])
106 buf1[m] = '\n'
107 if off += m; off >= len(alu) {
108 off -= len(alu)
109 }
110 n -= m
111 }
112 }
113
114 const (
115 fastaLookupSize = 4096
116 fastaLookupScale float64 = fastaLookupSize - 1
117 )
118
119 var fastaRand uint32 = 42
120
121 type fastaAcid struct {
122 sym byte
123 prob float64
124 cprob float64
125 next *fastaAcid
126 }
127
128 func fastaComputeLookup(acid []fastaAcid) *[fastaLookupSize]*fastaAcid {
129 var lookup [fastaLookupSize]*fastaAcid
130 var p float64
131 for i := range acid {
132 p += acid[i].prob
133 acid[i].cprob = p * fastaLookupScale
134 if i > 0 {
135 acid[i-1].next = &acid[i]
136 }
137 }
138 acid[len(acid)-1].cprob = 1.0 * fastaLookupScale
139
140 j := 0
141 for i := range lookup {
142 for acid[j].cprob < float64(i) {
143 j++
144 }
145 lookup[i] = &acid[j]
146 }
147
148 return &lookup
149 }
150
151 func fastaRandom(out *fastaBuffer, acid []fastaAcid, n int) {
152 const (
153 IM = 139968
154 IA = 3877
155 IC = 29573
156 )
157 lookup := fastaComputeLookup(acid)
158 for n > 0 {
159 m := n
160 if m > fastaLine {
161 m = fastaLine
162 }
163 buf := out.NextWrite(m + 1)
164 f := fastaLookupScale / IM
165 myrand := fastaRand
166 for i := 0; i < m; i++ {
167 myrand = (myrand*IA + IC) % IM
168 r := float64(int(myrand)) * f
169 a := lookup[int(r)]
170 for a.cprob < r {
171 a = a.next
172 }
173 buf[i] = a.sym
174 }
175 fastaRand = myrand
176 buf[m] = '\n'
177 n -= m
178 }
179 }
180
View as plain text