You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

282 lines
7.9 KiB

7 years ago
7 years ago
abci: localClient improvements & bugfixes & pubsub Unsubscribe issues (#2748) * use READ lock/unlock in ConsensusState#GetLastHeight Refs #2721 * do not use defers when there's no need * fix peer formatting (output its address instead of the pointer) ``` [54310]: E[11-02|11:59:39.851] Connection failed @ sendRoutine module=p2p peer=0xb78f00 conn=MConn{74.207.236.148:26656} err="pong timeout" ``` https://github.com/tendermint/tendermint/issues/2721#issuecomment-435326581 * panic if peer has no state https://github.com/tendermint/tendermint/issues/2721#issuecomment-435347165 It's confusing that sometimes we check if peer has a state, but most of the times we expect it to be there 1. https://github.com/tendermint/tendermint/blob/add79700b5fe84417538202b6c927c8cc5383672/mempool/reactor.go#L138 2. https://github.com/tendermint/tendermint/blob/add79700b5fe84417538202b6c927c8cc5383672/rpc/core/consensus.go#L196 (edited) I will change everything to always assume peer has a state and panic otherwise that should help identify issues earlier * abci/localclient: extend lock on app callback App callback should be protected by lock as well (note this was already done for InitChainAsync, why not for others???). Otherwise, when we execute the block, tx might come in and call the callback in the same time we're updating it in execBlockOnProxyApp => DATA RACE Fixes #2721 Consensus state is locked ``` goroutine 113333 [semacquire, 309 minutes]: sync.runtime_SemacquireMutex(0xc00180009c, 0xc0000c7e00) /usr/local/go/src/runtime/sema.go:71 +0x3d sync.(*RWMutex).RLock(0xc001800090) /usr/local/go/src/sync/rwmutex.go:50 +0x4e github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).GetRoundState(0xc001800000, 0x0) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:218 +0x46 github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusReactor).queryMaj23Routine(0xc0017def80, 0x11104a0, 0xc0072488f0, 0xc007248 9c0) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/reactor.go:735 +0x16d created by github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusReactor).AddPeer /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/reactor.go:172 +0x236 ``` because localClient is locked ``` goroutine 1899 [semacquire, 309 minutes]: sync.runtime_SemacquireMutex(0xc00003363c, 0xc0000cb500) /usr/local/go/src/runtime/sema.go:71 +0x3d sync.(*Mutex).Lock(0xc000033638) /usr/local/go/src/sync/mutex.go:134 +0xff github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/abci/client.(*localClient).SetResponseCallback(0xc0001fb560, 0xc007868540) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/abci/client/local_client.go:32 +0x33 github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/proxy.(*appConnConsensus).SetResponseCallback(0xc00002f750, 0xc007868540) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/proxy/app_conn.go:57 +0x40 github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/state.execBlockOnProxyApp(0x1104e20, 0xc002ca0ba0, 0x11092a0, 0xc00002f750, 0xc0001fe960, 0xc000bfc660, 0x110cfe0, 0xc000090330, 0xc9d12, 0xc000d9d5a0, ...) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/state/execution.go:230 +0x1fd github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/state.(*BlockExecutor).ApplyBlock(0xc002c2a230, 0x7, 0x0, 0xc000eae880, 0x6, 0xc002e52c60, 0x16, 0x1f927, 0xc9d12, 0xc000d9d5a0, ...) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/state/execution.go:96 +0x142 github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).finalizeCommit(0xc001800000, 0x1f928) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:1339 +0xa3e github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).tryFinalizeCommit(0xc001800000, 0x1f928) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:1270 +0x451 github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).enterCommit.func1(0xc001800000, 0x0, 0x1f928) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:1218 +0x90 github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).enterCommit(0xc001800000, 0x1f928, 0x0) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:1247 +0x6b8 github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).addVote(0xc001800000, 0xc003d8dea0, 0xc000cf4cc0, 0x28, 0xf1, 0xc003bc7ad0, 0xc003bc7b10) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:1659 +0xbad github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).tryAddVote(0xc001800000, 0xc003d8dea0, 0xc000cf4cc0, 0x28, 0xf1, 0xf1, 0xf1) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:1517 +0x59 github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).handleMsg(0xc001800000, 0xd98200, 0xc0070dbed0, 0xc000cf4cc0, 0x28) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:660 +0x64b github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).receiveRoutine(0xc001800000, 0x0) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:617 +0x670 created by github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).OnStart /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:311 +0x132 ``` tx comes in and CheckTx is executed right when we execute the block ``` goroutine 111044 [semacquire, 309 minutes]: sync.runtime_SemacquireMutex(0xc00003363c, 0x0) /usr/local/go/src/runtime/sema.go:71 +0x3d sync.(*Mutex).Lock(0xc000033638) /usr/local/go/src/sync/mutex.go:134 +0xff github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/abci/client.(*localClient).CheckTxAsync(0xc0001fb0e0, 0xc002d94500, 0x13f, 0x280, 0x0) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/abci/client/local_client.go:85 +0x47 github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/proxy.(*appConnMempool).CheckTxAsync(0xc00002f720, 0xc002d94500, 0x13f, 0x280, 0x1) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/proxy/app_conn.go:114 +0x51 github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/mempool.(*Mempool).CheckTx(0xc002d3a320, 0xc002d94500, 0x13f, 0x280, 0xc0072355f0, 0x0, 0x0) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/mempool/mempool.go:316 +0x17b github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/rpc/core.BroadcastTxSync(0xc002d94500, 0x13f, 0x280, 0x0, 0x0, 0x0) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/rpc/core/mempool.go:93 +0xb8 reflect.Value.call(0xd85560, 0x10326c0, 0x13, 0xec7b8b, 0x4, 0xc00663f180, 0x1, 0x1, 0xc00663f180, 0xc00663f188, ...) /usr/local/go/src/reflect/value.go:447 +0x449 reflect.Value.Call(0xd85560, 0x10326c0, 0x13, 0xc00663f180, 0x1, 0x1, 0x0, 0x0, 0xc005cc9344) /usr/local/go/src/reflect/value.go:308 +0xa4 github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/rpc/lib/server.makeHTTPHandler.func2(0x1102060, 0xc00663f100, 0xc0082d7900) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/rpc/lib/server/handlers.go:269 +0x188 net/http.HandlerFunc.ServeHTTP(0xc002c81f20, 0x1102060, 0xc00663f100, 0xc0082d7900) /usr/local/go/src/net/http/server.go:1964 +0x44 net/http.(*ServeMux).ServeHTTP(0xc002c81b60, 0x1102060, 0xc00663f100, 0xc0082d7900) /usr/local/go/src/net/http/server.go:2361 +0x127 github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/rpc/lib/server.maxBytesHandler.ServeHTTP(0x10f8a40, 0xc002c81b60, 0xf4240, 0x1102060, 0xc00663f100, 0xc0082d7900) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/rpc/lib/server/http_server.go:219 +0xcf github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/rpc/lib/server.RecoverAndLogHandler.func1(0x1103220, 0xc00121e620, 0xc0082d7900) /root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/rpc/lib/server/http_server.go:192 +0x394 net/http.HandlerFunc.ServeHTTP(0xc002c06ea0, 0x1103220, 0xc00121e620, 0xc0082d7900) /usr/local/go/src/net/http/server.go:1964 +0x44 net/http.serverHandler.ServeHTTP(0xc001a1aa90, 0x1103220, 0xc00121e620, 0xc0082d7900) /usr/local/go/src/net/http/server.go:2741 +0xab net/http.(*conn).serve(0xc00785a3c0, 0x11041a0, 0xc000f844c0) /usr/local/go/src/net/http/server.go:1847 +0x646 created by net/http.(*Server).Serve /usr/local/go/src/net/http/server.go:2851 +0x2f5 ``` * consensus: use read lock in Receive#VoteMessage * use defer to unlock mutex because application might panic * use defer in every method of the localClient * add a changelog entry * drain channels before Unsubscribe(All) Read https://github.com/tendermint/tendermint/blob/55362ed76630f3e1ebec159a598f6a9fb5892cb1/libs/pubsub/pubsub.go#L13 for the detailed explanation of the issue. We'll need to fix it someday. Make sure to keep an eye on https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-033-pubsub.md * retry instead of panic when peer has no state in reactors other than consensus in /dump_consensus_state RPC endpoint, skip a peer with no state * rpc/core/mempool: simplify error messages * rpc/core/mempool: use time.After instead of timer also, do not log DeliverTx result (to be consistent with other memthods) * unlock before calling the callback in reqRes#SetCallback
6 years ago
7 years ago
  1. package evidence_test
  2. import (
  3. "encoding/hex"
  4. "fmt"
  5. "sync"
  6. "testing"
  7. "time"
  8. "github.com/go-kit/kit/log/term"
  9. "github.com/stretchr/testify/assert"
  10. "github.com/stretchr/testify/mock"
  11. "github.com/stretchr/testify/require"
  12. dbm "github.com/tendermint/tm-db"
  13. cfg "github.com/tendermint/tendermint/config"
  14. "github.com/tendermint/tendermint/crypto"
  15. "github.com/tendermint/tendermint/crypto/tmhash"
  16. "github.com/tendermint/tendermint/evidence"
  17. "github.com/tendermint/tendermint/evidence/mocks"
  18. "github.com/tendermint/tendermint/libs/log"
  19. "github.com/tendermint/tendermint/p2p"
  20. ep "github.com/tendermint/tendermint/proto/tendermint/evidence"
  21. tmproto "github.com/tendermint/tendermint/proto/tendermint/types"
  22. sm "github.com/tendermint/tendermint/state"
  23. "github.com/tendermint/tendermint/types"
  24. )
  25. // evidenceLogger is a TestingLogger which uses a different
  26. // color for each validator ("validator" key must exist).
  27. func evidenceLogger() log.Logger {
  28. return log.TestingLoggerWithColorFn(func(keyvals ...interface{}) term.FgBgColor {
  29. for i := 0; i < len(keyvals)-1; i += 2 {
  30. if keyvals[i] == "validator" {
  31. return term.FgBgColor{Fg: term.Color(uint8(keyvals[i+1].(int) + 1))}
  32. }
  33. }
  34. return term.FgBgColor{}
  35. })
  36. }
  37. // connect N evidence reactors through N switches
  38. func makeAndConnectReactorsAndPools(config *cfg.Config, stateStores []sm.Store) ([]*evidence.Reactor,
  39. []*evidence.Pool) {
  40. N := len(stateStores)
  41. reactors := make([]*evidence.Reactor, N)
  42. pools := make([]*evidence.Pool, N)
  43. logger := evidenceLogger()
  44. evidenceTime := time.Date(2019, 1, 1, 0, 0, 0, 0, time.UTC)
  45. for i := 0; i < N; i++ {
  46. evidenceDB := dbm.NewMemDB()
  47. blockStore := &mocks.BlockStore{}
  48. blockStore.On("LoadBlockMeta", mock.AnythingOfType("int64")).Return(
  49. &types.BlockMeta{Header: types.Header{Time: evidenceTime}},
  50. )
  51. pool, err := evidence.NewPool(evidenceDB, stateStores[i], blockStore)
  52. if err != nil {
  53. panic(err)
  54. }
  55. pools[i] = pool
  56. reactors[i] = evidence.NewReactor(pool)
  57. reactors[i].SetLogger(logger.With("validator", i))
  58. }
  59. p2p.MakeConnectedSwitches(config.P2P, N, func(i int, s *p2p.Switch) *p2p.Switch {
  60. s.AddReactor("EVIDENCE", reactors[i])
  61. return s
  62. }, p2p.Connect2Switches)
  63. return reactors, pools
  64. }
  65. // wait for all evidence on all reactors
  66. func waitForEvidence(t *testing.T, evs types.EvidenceList, pools []*evidence.Pool) {
  67. // wait for the evidence in all evpools
  68. wg := new(sync.WaitGroup)
  69. for i := 0; i < len(pools); i++ {
  70. wg.Add(1)
  71. go _waitForEvidence(t, wg, evs, i, pools)
  72. }
  73. done := make(chan struct{})
  74. go func() {
  75. wg.Wait()
  76. close(done)
  77. }()
  78. timer := time.After(timeout)
  79. select {
  80. case <-timer:
  81. t.Fatal("Timed out waiting for evidence")
  82. case <-done:
  83. }
  84. }
  85. // wait for all evidence on a single evpool
  86. func _waitForEvidence(
  87. t *testing.T,
  88. wg *sync.WaitGroup,
  89. evs types.EvidenceList,
  90. poolIdx int,
  91. pools []*evidence.Pool,
  92. ) {
  93. evpool := pools[poolIdx]
  94. var evList []types.Evidence
  95. currentPoolSize := 0
  96. for currentPoolSize != len(evs) {
  97. evList, _ = evpool.PendingEvidence(int64(len(evs) * 500)) // each evidence should not be more than 500 bytes
  98. currentPoolSize = len(evList)
  99. time.Sleep(time.Millisecond * 100)
  100. }
  101. // put the reaped evidence in a map so we can quickly check we got everything
  102. evMap := make(map[string]types.Evidence)
  103. for _, e := range evList {
  104. evMap[string(e.Hash())] = e
  105. }
  106. for i, expectedEv := range evs {
  107. gotEv := evMap[string(expectedEv.Hash())]
  108. assert.Equal(t, expectedEv, gotEv,
  109. fmt.Sprintf("evidence at index %d on pool %d don't match: %v vs %v",
  110. i, poolIdx, expectedEv, gotEv))
  111. }
  112. wg.Done()
  113. }
  114. func sendEvidence(t *testing.T, evpool *evidence.Pool, val types.PrivValidator, n int) types.EvidenceList {
  115. evList := make([]types.Evidence, n)
  116. for i := 0; i < n; i++ {
  117. ev := types.NewMockDuplicateVoteEvidenceWithValidator(int64(i+1),
  118. time.Date(2019, 1, 1, 0, 0, 0, 0, time.UTC), val, evidenceChainID)
  119. err := evpool.AddEvidence(ev)
  120. require.NoError(t, err)
  121. evList[i] = ev
  122. }
  123. return evList
  124. }
  125. var (
  126. numEvidence = 10
  127. timeout = 120 * time.Second // ridiculously high because CircleCI is slow
  128. )
  129. func TestReactorBroadcastEvidence(t *testing.T) {
  130. config := cfg.TestConfig()
  131. N := 7
  132. // create statedb for everyone
  133. stateDBs := make([]sm.Store, N)
  134. val := types.NewMockPV()
  135. // we need validators saved for heights at least as high as we have evidence for
  136. height := int64(numEvidence) + 10
  137. for i := 0; i < N; i++ {
  138. stateDBs[i] = initializeValidatorState(val, height)
  139. }
  140. // make reactors from statedb
  141. reactors, pools := makeAndConnectReactorsAndPools(config, stateDBs)
  142. // set the peer height on each reactor
  143. for _, r := range reactors {
  144. for _, peer := range r.Switch.Peers().List() {
  145. ps := peerState{height}
  146. peer.Set(types.PeerStateKey, ps)
  147. }
  148. }
  149. // send a bunch of valid evidence to the first reactor's evpool
  150. // and wait for them all to be received in the others
  151. evList := sendEvidence(t, pools[0], val, numEvidence)
  152. waitForEvidence(t, evList, pools)
  153. }
  154. type peerState struct {
  155. height int64
  156. }
  157. func (ps peerState) GetHeight() int64 {
  158. return ps.height
  159. }
  160. func TestReactorSelectiveBroadcast(t *testing.T) {
  161. config := cfg.TestConfig()
  162. val := types.NewMockPV()
  163. height1 := int64(numEvidence) + 10
  164. height2 := int64(numEvidence) / 2
  165. // DB1 is ahead of DB2
  166. stateDB1 := initializeValidatorState(val, height1)
  167. stateDB2 := initializeValidatorState(val, height2)
  168. // make reactors from statedb
  169. reactors, pools := makeAndConnectReactorsAndPools(config, []sm.Store{stateDB1, stateDB2})
  170. // set the peer height on each reactor
  171. for _, r := range reactors {
  172. for _, peer := range r.Switch.Peers().List() {
  173. ps := peerState{height1}
  174. peer.Set(types.PeerStateKey, ps)
  175. }
  176. }
  177. // update the first reactor peer's height to be very small
  178. peer := reactors[0].Switch.Peers().List()[0]
  179. ps := peerState{height2}
  180. peer.Set(types.PeerStateKey, ps)
  181. // send a bunch of valid evidence to the first reactor's evpool
  182. evList := sendEvidence(t, pools[0], val, numEvidence)
  183. // only ones less than the peers height should make it through
  184. waitForEvidence(t, evList[:numEvidence/2-1], pools[1:2])
  185. // peers should still be connected
  186. peers := reactors[1].Switch.Peers().List()
  187. assert.Equal(t, 1, len(peers))
  188. }
  189. func exampleVote(t byte) *types.Vote {
  190. var stamp, err = time.Parse(types.TimeFormat, "2017-12-25T03:00:01.234Z")
  191. if err != nil {
  192. panic(err)
  193. }
  194. return &types.Vote{
  195. Type: tmproto.SignedMsgType(t),
  196. Height: 3,
  197. Round: 2,
  198. Timestamp: stamp,
  199. BlockID: types.BlockID{
  200. Hash: tmhash.Sum([]byte("blockID_hash")),
  201. PartSetHeader: types.PartSetHeader{
  202. Total: 1000000,
  203. Hash: tmhash.Sum([]byte("blockID_part_set_header_hash")),
  204. },
  205. },
  206. ValidatorAddress: crypto.AddressHash([]byte("validator_address")),
  207. ValidatorIndex: 56789,
  208. }
  209. }
  210. // nolint:lll //ignore line length for tests
  211. func TestEvidenceVectors(t *testing.T) {
  212. dupl := types.NewDuplicateVoteEvidence(exampleVote(1), exampleVote(2))
  213. testCases := []struct {
  214. testName string
  215. evidenceList []types.Evidence
  216. expBytes string
  217. }{
  218. {"DuplicateVoteEvidence", []types.Evidence{dupl}, "0af9010af6010a79080210031802224a0a208b01023386c371778ecb6368573e539afc3cc860ec3a2f614e54fe5652f4fc80122608c0843d122072db3d959635dff1bb567bedaa70573392c5159666a3f8caf11e413aac52207a2a0b08b1d381d20510809dca6f32146af1f4111082efb388211bc72c55bcd61e9ac3d538d5bb031279080110031802224a0a208b01023386c371778ecb6368573e539afc3cc860ec3a2f614e54fe5652f4fc80122608c0843d122072db3d959635dff1bb567bedaa70573392c5159666a3f8caf11e413aac52207a2a0b08b1d381d20510809dca6f32146af1f4111082efb388211bc72c55bcd61e9ac3d538d5bb03"},
  219. }
  220. for _, tc := range testCases {
  221. tc := tc
  222. evi := make([]*tmproto.Evidence, len(tc.evidenceList))
  223. for i := 0; i < len(tc.evidenceList); i++ {
  224. ev, err := types.EvidenceToProto(tc.evidenceList[i])
  225. require.NoError(t, err, tc.testName)
  226. evi[i] = ev
  227. }
  228. epl := ep.List{
  229. Evidence: evi,
  230. }
  231. bz, err := epl.Marshal()
  232. require.NoError(t, err, tc.testName)
  233. require.Equal(t, tc.expBytes, hex.EncodeToString(bz), tc.testName)
  234. }
  235. }