// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package ssa import ( "cmd/compile/internal/reflectdata" "cmd/compile/internal/types" "cmd/internal/obj" "cmd/internal/objabi" "cmd/internal/src" "fmt" ) // A ZeroRegion records parts of an object which are known to be zero. // A ZeroRegion only applies to a single memory state. // Each bit in mask is set if the corresponding pointer-sized word of // the base object is known to be zero. // In other words, if mask & (1< 64*ptrSize { // write goes off end of tracked offsets return true } z := zeroes[v.MemoryArg().ID] if ptr != z.base { return true } for i := off; i < off+size; i += ptrSize { if z.mask>>uint(i/ptrSize)&1 == 0 { return true // not known to be zero } } // All written locations are known to be zero - write barrier not needed. return false } return true } // writebarrier pass inserts write barriers for store ops (Store, Move, Zero) // when necessary (the condition above). It rewrites store ops to branches // and runtime calls, like // // if writeBarrier.enabled { // gcWriteBarrier(ptr, val) // Not a regular Go call // } else { // *ptr = val // } // // A sequence of WB stores for many pointer fields of a single type will // be emitted together, with a single branch. func writebarrier(f *Func) { if !f.fe.UseWriteBarrier() { return } var sb, sp, wbaddr, const0 *Value var typedmemmove, typedmemclr, gcWriteBarrier *obj.LSym var stores, after []*Value var sset *sparseSet var storeNumber []int32 zeroes := f.computeZeroMap() for _, b := range f.Blocks { // range loop is safe since the blocks we added contain no stores to expand // first, identify all the stores that need to insert a write barrier. // mark them with WB ops temporarily. record presence of WB ops. nWBops := 0 // count of temporarily created WB ops remaining to be rewritten in the current block for _, v := range b.Values { switch v.Op { case OpStore, OpMove, OpZero: if needwb(v, zeroes) { switch v.Op { case OpStore: v.Op = OpStoreWB case OpMove: v.Op = OpMoveWB case OpZero: v.Op = OpZeroWB } nWBops++ } } } if nWBops == 0 { continue } if wbaddr == nil { // lazily initialize global values for write barrier test and calls // find SB and SP values in entry block initpos := f.Entry.Pos sp, sb = f.spSb() wbsym := f.fe.Syslook("writeBarrier") wbaddr = f.Entry.NewValue1A(initpos, OpAddr, f.Config.Types.UInt32Ptr, wbsym, sb) gcWriteBarrier = f.fe.Syslook("gcWriteBarrier") typedmemmove = f.fe.Syslook("typedmemmove") typedmemclr = f.fe.Syslook("typedmemclr") const0 = f.ConstInt32(f.Config.Types.UInt32, 0) // allocate auxiliary data structures for computing store order sset = f.newSparseSet(f.NumValues()) defer f.retSparseSet(sset) storeNumber = make([]int32, f.NumValues()) } // order values in store order b.Values = storeOrder(b.Values, sset, storeNumber) firstSplit := true again: // find the start and end of the last contiguous WB store sequence. // a branch will be inserted there. values after it will be moved // to a new block. var last *Value var start, end int values := b.Values FindSeq: for i := len(values) - 1; i >= 0; i-- { w := values[i] switch w.Op { case OpStoreWB, OpMoveWB, OpZeroWB: start = i if last == nil { last = w end = i + 1 } case OpVarDef, OpVarLive, OpVarKill: continue default: if last == nil { continue } break FindSeq } } stores = append(stores[:0], b.Values[start:end]...) // copy to avoid aliasing after = append(after[:0], b.Values[end:]...) b.Values = b.Values[:start] // find the memory before the WB stores mem := stores[0].MemoryArg() pos := stores[0].Pos bThen := f.NewBlock(BlockPlain) bElse := f.NewBlock(BlockPlain) bEnd := f.NewBlock(b.Kind) bThen.Pos = pos bElse.Pos = pos bEnd.Pos = b.Pos b.Pos = pos // set up control flow for end block bEnd.CopyControls(b) bEnd.Likely = b.Likely for _, e := range b.Succs { bEnd.Succs = append(bEnd.Succs, e) e.b.Preds[e.i].b = bEnd } // set up control flow for write barrier test // load word, test word, avoiding partial register write from load byte. cfgtypes := &f.Config.Types flag := b.NewValue2(pos, OpLoad, cfgtypes.UInt32, wbaddr, mem) flag = b.NewValue2(pos, OpNeq32, cfgtypes.Bool, flag, const0) b.Kind = BlockIf b.SetControl(flag) b.Likely = BranchUnlikely b.Succs = b.Succs[:0] b.AddEdgeTo(bThen) b.AddEdgeTo(bElse) // TODO: For OpStoreWB and the buffered write barrier, // we could move the write out of the write barrier, // which would lead to fewer branches. We could do // something similar to OpZeroWB, since the runtime // could provide just the barrier half and then we // could unconditionally do an OpZero (which could // also generate better zeroing code). OpMoveWB is // trickier and would require changing how // cgoCheckMemmove works. bThen.AddEdgeTo(bEnd) bElse.AddEdgeTo(bEnd) // for each write barrier store, append write barrier version to bThen // and simple store version to bElse memThen := mem memElse := mem // If the source of a MoveWB is volatile (will be clobbered by a // function call), we need to copy it to a temporary location, as // marshaling the args of typedmemmove might clobber the value we're // trying to move. // Look for volatile source, copy it to temporary before we emit any // call. // It is unlikely to have more than one of them. Just do a linear // search instead of using a map. type volatileCopy struct { src *Value // address of original volatile value tmp *Value // address of temporary we've copied the volatile value into } var volatiles []volatileCopy copyLoop: for _, w := range stores { if w.Op == OpMoveWB { val := w.Args[1] if isVolatile(val) { for _, c := range volatiles { if val == c.src { continue copyLoop // already copied } } t := val.Type.Elem() tmp := f.fe.Auto(w.Pos, t) memThen = bThen.NewValue1A(w.Pos, OpVarDef, types.TypeMem, tmp, memThen) tmpaddr := bThen.NewValue2A(w.Pos, OpLocalAddr, t.PtrTo(), tmp, sp, memThen) siz := t.Size() memThen = bThen.NewValue3I(w.Pos, OpMove, types.TypeMem, siz, tmpaddr, val, memThen) memThen.Aux = t volatiles = append(volatiles, volatileCopy{val, tmpaddr}) } } } for _, w := range stores { ptr := w.Args[0] pos := w.Pos var fn *obj.LSym var typ *obj.LSym var val *Value switch w.Op { case OpStoreWB: val = w.Args[1] nWBops-- case OpMoveWB: fn = typedmemmove val = w.Args[1] typ = reflectdata.TypeLinksym(w.Aux.(*types.Type)) nWBops-- case OpZeroWB: fn = typedmemclr typ = reflectdata.TypeLinksym(w.Aux.(*types.Type)) nWBops-- case OpVarDef, OpVarLive, OpVarKill: } // then block: emit write barrier call switch w.Op { case OpStoreWB, OpMoveWB, OpZeroWB: if w.Op == OpStoreWB { memThen = bThen.NewValue3A(pos, OpWB, types.TypeMem, gcWriteBarrier, ptr, val, memThen) } else { srcval := val if w.Op == OpMoveWB && isVolatile(srcval) { for _, c := range volatiles { if srcval == c.src { srcval = c.tmp break } } } memThen = wbcall(pos, bThen, fn, typ, ptr, srcval, memThen, sp, sb) } // Note that we set up a writebarrier function call. f.fe.SetWBPos(pos) case OpVarDef, OpVarLive, OpVarKill: memThen = bThen.NewValue1A(pos, w.Op, types.TypeMem, w.Aux, memThen) } // else block: normal store switch w.Op { case OpStoreWB: memElse = bElse.NewValue3A(pos, OpStore, types.TypeMem, w.Aux, ptr, val, memElse) case OpMoveWB: memElse = bElse.NewValue3I(pos, OpMove, types.TypeMem, w.AuxInt, ptr, val, memElse) memElse.Aux = w.Aux case OpZeroWB: memElse = bElse.NewValue2I(pos, OpZero, types.TypeMem, w.AuxInt, ptr, memElse) memElse.Aux = w.Aux case OpVarDef, OpVarLive, OpVarKill: memElse = bElse.NewValue1A(pos, w.Op, types.TypeMem, w.Aux, memElse) } } // mark volatile temps dead for _, c := range volatiles { tmpNode := c.tmp.Aux memThen = bThen.NewValue1A(memThen.Pos, OpVarKill, types.TypeMem, tmpNode, memThen) } // merge memory // Splice memory Phi into the last memory of the original sequence, // which may be used in subsequent blocks. Other memories in the // sequence must be dead after this block since there can be only // one memory live. bEnd.Values = append(bEnd.Values, last) last.Block = bEnd last.reset(OpPhi) last.Pos = last.Pos.WithNotStmt() last.Type = types.TypeMem last.AddArg(memThen) last.AddArg(memElse) for _, w := range stores { if w != last { w.resetArgs() } } for _, w := range stores { if w != last { f.freeValue(w) } } // put values after the store sequence into the end block bEnd.Values = append(bEnd.Values, after...) for _, w := range after { w.Block = bEnd } // Preemption is unsafe between loading the write // barrier-enabled flag and performing the write // because that would allow a GC phase transition, // which would invalidate the flag. Remember the // conditional block so liveness analysis can disable // safe-points. This is somewhat subtle because we're // splitting b bottom-up. if firstSplit { // Add b itself. b.Func.WBLoads = append(b.Func.WBLoads, b) firstSplit = false } else { // We've already split b, so we just pushed a // write barrier test into bEnd. b.Func.WBLoads = append(b.Func.WBLoads, bEnd) } // if we have more stores in this block, do this block again if nWBops > 0 { goto again } } } // computeZeroMap returns a map from an ID of a memory value to // a set of locations that are known to be zeroed at that memory value. func (f *Func) computeZeroMap() map[ID]ZeroRegion { ptrSize := f.Config.PtrSize // Keep track of which parts of memory are known to be zero. // This helps with removing write barriers for various initialization patterns. // This analysis is conservative. We only keep track, for each memory state, of // which of the first 64 words of a single object are known to be zero. zeroes := map[ID]ZeroRegion{} // Find new objects. for _, b := range f.Blocks { for _, v := range b.Values { if mem, ok := IsNewObject(v); ok { nptr := v.Type.Elem().Size() / ptrSize if nptr > 64 { nptr = 64 } zeroes[mem.ID] = ZeroRegion{base: v, mask: 1< 64*ptrSize { max = 64 * ptrSize } // Clear bits for parts that we are writing (and hence // will no longer necessarily be zero). for i := min; i < max; i += ptrSize { bit := i / ptrSize z.mask &^= 1 << uint(bit) } if z.mask == 0 { // No more known zeros - don't bother keeping. continue } // Save updated known zero contents for new store. if zeroes[v.ID] != z { zeroes[v.ID] = z changed = true } } } if !changed { break } } if f.pass.debug > 0 { fmt.Printf("func %s\n", f.Name) for mem, z := range zeroes { fmt.Printf(" memory=v%d ptr=%v zeromask=%b\n", mem, z.base, z.mask) } } return zeroes } // wbcall emits write barrier runtime call in b, returns memory. func wbcall(pos src.XPos, b *Block, fn, typ *obj.LSym, ptr, val, mem, sp, sb *Value) *Value { config := b.Func.Config var wbargs []*Value // TODO (register args) this is a bit of a hack. inRegs := b.Func.ABIDefault == b.Func.ABI1 && len(config.intParamRegs) >= 3 // put arguments on stack off := config.ctxt.FixedFrameSize() var argTypes []*types.Type if typ != nil { // for typedmemmove taddr := b.NewValue1A(pos, OpAddr, b.Func.Config.Types.Uintptr, typ, sb) argTypes = append(argTypes, b.Func.Config.Types.Uintptr) off = round(off, taddr.Type.Alignment()) if inRegs { wbargs = append(wbargs, taddr) } else { arg := b.NewValue1I(pos, OpOffPtr, taddr.Type.PtrTo(), off, sp) mem = b.NewValue3A(pos, OpStore, types.TypeMem, ptr.Type, arg, taddr, mem) } off += taddr.Type.Size() } argTypes = append(argTypes, ptr.Type) off = round(off, ptr.Type.Alignment()) if inRegs { wbargs = append(wbargs, ptr) } else { arg := b.NewValue1I(pos, OpOffPtr, ptr.Type.PtrTo(), off, sp) mem = b.NewValue3A(pos, OpStore, types.TypeMem, ptr.Type, arg, ptr, mem) } off += ptr.Type.Size() if val != nil { argTypes = append(argTypes, val.Type) off = round(off, val.Type.Alignment()) if inRegs { wbargs = append(wbargs, val) } else { arg := b.NewValue1I(pos, OpOffPtr, val.Type.PtrTo(), off, sp) mem = b.NewValue3A(pos, OpStore, types.TypeMem, val.Type, arg, val, mem) } off += val.Type.Size() } off = round(off, config.PtrSize) wbargs = append(wbargs, mem) // issue call call := b.NewValue0A(pos, OpStaticCall, types.TypeResultMem, StaticAuxCall(fn, b.Func.ABIDefault.ABIAnalyzeTypes(nil, argTypes, nil))) call.AddArgs(wbargs...) call.AuxInt = off - config.ctxt.FixedFrameSize() return b.NewValue1I(pos, OpSelectN, types.TypeMem, 0, call) } // round to a multiple of r, r is a power of 2 func round(o int64, r int64) int64 { return (o + r - 1) &^ (r - 1) } // IsStackAddr reports whether v is known to be an address of a stack slot. func IsStackAddr(v *Value) bool { for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy { v = v.Args[0] } switch v.Op { case OpSP, OpLocalAddr, OpSelectNAddr, OpGetCallerSP: return true } return false } // IsGlobalAddr reports whether v is known to be an address of a global (or nil). func IsGlobalAddr(v *Value) bool { for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy { v = v.Args[0] } if v.Op == OpAddr && v.Args[0].Op == OpSB { return true // address of a global } if v.Op == OpConstNil { return true } if v.Op == OpLoad && IsReadOnlyGlobalAddr(v.Args[0]) { return true // loading from a read-only global - the resulting address can't be a heap address. } return false } // IsReadOnlyGlobalAddr reports whether v is known to be an address of a read-only global. func IsReadOnlyGlobalAddr(v *Value) bool { if v.Op == OpConstNil { // Nil pointers are read only. See issue 33438. return true } if v.Op == OpAddr && v.Aux.(*obj.LSym).Type == objabi.SRODATA { return true } return false } // IsNewObject reports whether v is a pointer to a freshly allocated & zeroed object, // if so, also returns the memory state mem at which v is zero. func IsNewObject(v *Value) (mem *Value, ok bool) { f := v.Block.Func c := f.Config if f.ABIDefault == f.ABI1 && len(c.intParamRegs) >= 1 { if v.Op != OpSelectN || v.AuxInt != 0 { return nil, false } // Find the memory for _, w := range v.Block.Values { if w.Op == OpSelectN && w.AuxInt == 1 && w.Args[0] == v.Args[0] { mem = w break } } if mem == nil { return nil, false } } else { if v.Op != OpLoad { return nil, false } mem = v.MemoryArg() if mem.Op != OpSelectN { return nil, false } if mem.Type != types.TypeMem { return nil, false } // assume it is the right selection if true } call := mem.Args[0] if call.Op != OpStaticCall { return nil, false } if !isSameCall(call.Aux, "runtime.newobject") { return nil, false } if f.ABIDefault == f.ABI1 && len(c.intParamRegs) >= 1 { if v.Args[0] == call { return mem, true } return nil, false } if v.Args[0].Op != OpOffPtr { return nil, false } if v.Args[0].Args[0].Op != OpSP { return nil, false } if v.Args[0].AuxInt != c.ctxt.FixedFrameSize()+c.RegSize { // offset of return value return nil, false } return mem, true } // IsSanitizerSafeAddr reports whether v is known to be an address // that doesn't need instrumentation. func IsSanitizerSafeAddr(v *Value) bool { for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy { v = v.Args[0] } switch v.Op { case OpSP, OpLocalAddr, OpSelectNAddr: // Stack addresses are always safe. return true case OpITab, OpStringPtr, OpGetClosurePtr: // Itabs, string data, and closure fields are // read-only once initialized. return true case OpAddr: return v.Aux.(*obj.LSym).Type == objabi.SRODATA || v.Aux.(*obj.LSym).Type == objabi.SLIBFUZZER_EXTRA_COUNTER } return false } // isVolatile reports whether v is a pointer to argument region on stack which // will be clobbered by a function call. func isVolatile(v *Value) bool { for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy || v.Op == OpSelectNAddr { v = v.Args[0] } return v.Op == OpSP }