You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

280 lines
7.0 KiB

blockchain: Reorg reactor (#3561) * go routines in blockchain reactor * Added reference to the go routine diagram * Initial commit * cleanup * Undo testing_logger change, committed by mistake * Fix the test loggers * pulled some fsm code into pool.go * added pool tests * changes to the design added block requests under peer moved the request trigger in the reactor poolRoutine, triggered now by a ticker in general moved everything required for making block requests smarter in the poolRoutine added a simple map of heights to keep track of what will need to be requested next added a few more tests * send errors to FSM in a different channel than blocks send errors (RemovePeer) from switch on a different channel than the one receiving blocks renamed channels added more pool tests * more pool tests * lint errors * more tests * more tests * switch fast sync to new implementation * fixed data race in tests * cleanup * finished fsm tests * address golangci comments :) * address golangci comments :) * Added timeout on next block needed to advance * updating docs and cleanup * fix issue in test from previous cleanup * cleanup * Added termination scenarios, tests and more cleanup * small fixes to adr, comments and cleanup * Fix bug in sendRequest() If we tried to send a request to a peer not present in the switch, a missing continue statement caused the request to be blackholed in a peer that was removed and never retried. While this bug was manifesting, the reactor kept asking for other blocks that would be stored and never consumed. Added the number of unconsumed blocks in the math for requesting blocks ahead of current processing height so eventually there will be no more blocks requested until the already received ones are consumed. * remove bpPeer's didTimeout field * Use distinct err codes for peer timeout and FSM timeouts * Don't allow peers to update with lower height * review comments from Ethan and Zarko * some cleanup, renaming, comments * Move block execution in separate goroutine * Remove pool's numPending * review comments * fix lint, remove old blockchain reactor and duplicates in fsm tests * small reorg around peer after review comments * add the reactor spec * verify block only once * review comments * change to int for max number of pending requests * cleanup and godoc * Add configuration flag fast sync version * golangci fixes * fix config template * move both reactor versions under blockchain * cleanup, golint, renaming stuff * updated documentation, fixed more golint warnings * integrate with behavior package * sync with master * gofmt * add changelog_pending entry * move to improvments * suggestion to changelog entry
5 years ago
blockchain: Reorg reactor (#3561) * go routines in blockchain reactor * Added reference to the go routine diagram * Initial commit * cleanup * Undo testing_logger change, committed by mistake * Fix the test loggers * pulled some fsm code into pool.go * added pool tests * changes to the design added block requests under peer moved the request trigger in the reactor poolRoutine, triggered now by a ticker in general moved everything required for making block requests smarter in the poolRoutine added a simple map of heights to keep track of what will need to be requested next added a few more tests * send errors to FSM in a different channel than blocks send errors (RemovePeer) from switch on a different channel than the one receiving blocks renamed channels added more pool tests * more pool tests * lint errors * more tests * more tests * switch fast sync to new implementation * fixed data race in tests * cleanup * finished fsm tests * address golangci comments :) * address golangci comments :) * Added timeout on next block needed to advance * updating docs and cleanup * fix issue in test from previous cleanup * cleanup * Added termination scenarios, tests and more cleanup * small fixes to adr, comments and cleanup * Fix bug in sendRequest() If we tried to send a request to a peer not present in the switch, a missing continue statement caused the request to be blackholed in a peer that was removed and never retried. While this bug was manifesting, the reactor kept asking for other blocks that would be stored and never consumed. Added the number of unconsumed blocks in the math for requesting blocks ahead of current processing height so eventually there will be no more blocks requested until the already received ones are consumed. * remove bpPeer's didTimeout field * Use distinct err codes for peer timeout and FSM timeouts * Don't allow peers to update with lower height * review comments from Ethan and Zarko * some cleanup, renaming, comments * Move block execution in separate goroutine * Remove pool's numPending * review comments * fix lint, remove old blockchain reactor and duplicates in fsm tests * small reorg around peer after review comments * add the reactor spec * verify block only once * review comments * change to int for max number of pending requests * cleanup and godoc * Add configuration flag fast sync version * golangci fixes * fix config template * move both reactor versions under blockchain * cleanup, golint, renaming stuff * updated documentation, fixed more golint warnings * integrate with behavior package * sync with master * gofmt * add changelog_pending entry * move to improvments * suggestion to changelog entry
5 years ago
blockchain: Reorg reactor (#3561) * go routines in blockchain reactor * Added reference to the go routine diagram * Initial commit * cleanup * Undo testing_logger change, committed by mistake * Fix the test loggers * pulled some fsm code into pool.go * added pool tests * changes to the design added block requests under peer moved the request trigger in the reactor poolRoutine, triggered now by a ticker in general moved everything required for making block requests smarter in the poolRoutine added a simple map of heights to keep track of what will need to be requested next added a few more tests * send errors to FSM in a different channel than blocks send errors (RemovePeer) from switch on a different channel than the one receiving blocks renamed channels added more pool tests * more pool tests * lint errors * more tests * more tests * switch fast sync to new implementation * fixed data race in tests * cleanup * finished fsm tests * address golangci comments :) * address golangci comments :) * Added timeout on next block needed to advance * updating docs and cleanup * fix issue in test from previous cleanup * cleanup * Added termination scenarios, tests and more cleanup * small fixes to adr, comments and cleanup * Fix bug in sendRequest() If we tried to send a request to a peer not present in the switch, a missing continue statement caused the request to be blackholed in a peer that was removed and never retried. While this bug was manifesting, the reactor kept asking for other blocks that would be stored and never consumed. Added the number of unconsumed blocks in the math for requesting blocks ahead of current processing height so eventually there will be no more blocks requested until the already received ones are consumed. * remove bpPeer's didTimeout field * Use distinct err codes for peer timeout and FSM timeouts * Don't allow peers to update with lower height * review comments from Ethan and Zarko * some cleanup, renaming, comments * Move block execution in separate goroutine * Remove pool's numPending * review comments * fix lint, remove old blockchain reactor and duplicates in fsm tests * small reorg around peer after review comments * add the reactor spec * verify block only once * review comments * change to int for max number of pending requests * cleanup and godoc * Add configuration flag fast sync version * golangci fixes * fix config template * move both reactor versions under blockchain * cleanup, golint, renaming stuff * updated documentation, fixed more golint warnings * integrate with behavior package * sync with master * gofmt * add changelog_pending entry * move to improvments * suggestion to changelog entry
5 years ago
  1. package v1
  2. import (
  3. "sync"
  4. "testing"
  5. "time"
  6. "github.com/stretchr/testify/assert"
  7. "github.com/stretchr/testify/require"
  8. cmn "github.com/tendermint/tendermint/libs/common"
  9. "github.com/tendermint/tendermint/libs/log"
  10. "github.com/tendermint/tendermint/p2p"
  11. "github.com/tendermint/tendermint/types"
  12. )
  13. func TestPeerMonitor(t *testing.T) {
  14. peer := NewBpPeer(
  15. p2p.ID(cmn.RandStr(12)), 10,
  16. func(err error, _ p2p.ID) {},
  17. nil)
  18. peer.SetLogger(log.TestingLogger())
  19. peer.startMonitor()
  20. assert.NotNil(t, peer.recvMonitor)
  21. peer.stopMonitor()
  22. assert.Nil(t, peer.recvMonitor)
  23. }
  24. func TestPeerResetBlockResponseTimer(t *testing.T) {
  25. var (
  26. numErrFuncCalls int // number of calls to the errFunc
  27. lastErr error // last generated error
  28. peerTestMtx sync.Mutex // modifications of ^^ variables are also done from timer handler goroutine
  29. )
  30. params := &BpPeerParams{timeout: 2 * time.Millisecond}
  31. peer := NewBpPeer(
  32. p2p.ID(cmn.RandStr(12)), 10,
  33. func(err error, _ p2p.ID) {
  34. peerTestMtx.Lock()
  35. defer peerTestMtx.Unlock()
  36. lastErr = err
  37. numErrFuncCalls++
  38. },
  39. params)
  40. peer.SetLogger(log.TestingLogger())
  41. checkByStoppingPeerTimer(t, peer, false)
  42. // initial reset call with peer having a nil timer
  43. peer.resetBlockResponseTimer()
  44. assert.NotNil(t, peer.blockResponseTimer)
  45. // make sure timer is running and stop it
  46. checkByStoppingPeerTimer(t, peer, true)
  47. // reset with running timer
  48. peer.resetBlockResponseTimer()
  49. time.Sleep(time.Millisecond)
  50. peer.resetBlockResponseTimer()
  51. assert.NotNil(t, peer.blockResponseTimer)
  52. // let the timer expire and ...
  53. time.Sleep(3 * time.Millisecond)
  54. // ... check timer is not running
  55. checkByStoppingPeerTimer(t, peer, false)
  56. peerTestMtx.Lock()
  57. // ... check errNoPeerResponse has been sent
  58. assert.Equal(t, 1, numErrFuncCalls)
  59. assert.Equal(t, lastErr, errNoPeerResponse)
  60. peerTestMtx.Unlock()
  61. }
  62. func TestPeerRequestSent(t *testing.T) {
  63. params := &BpPeerParams{timeout: 2 * time.Millisecond}
  64. peer := NewBpPeer(
  65. p2p.ID(cmn.RandStr(12)), 10,
  66. func(err error, _ p2p.ID) {},
  67. params)
  68. peer.SetLogger(log.TestingLogger())
  69. peer.RequestSent(1)
  70. assert.NotNil(t, peer.recvMonitor)
  71. assert.NotNil(t, peer.blockResponseTimer)
  72. assert.Equal(t, 1, peer.NumPendingBlockRequests)
  73. peer.RequestSent(1)
  74. assert.NotNil(t, peer.recvMonitor)
  75. assert.NotNil(t, peer.blockResponseTimer)
  76. assert.Equal(t, 2, peer.NumPendingBlockRequests)
  77. }
  78. func TestPeerGetAndRemoveBlock(t *testing.T) {
  79. peer := NewBpPeer(
  80. p2p.ID(cmn.RandStr(12)), 100,
  81. func(err error, _ p2p.ID) {},
  82. nil)
  83. // Change peer height
  84. peer.Height = int64(10)
  85. assert.Equal(t, int64(10), peer.Height)
  86. // request some blocks and receive few of them
  87. for i := 1; i <= 10; i++ {
  88. peer.RequestSent(int64(i))
  89. if i > 5 {
  90. // only receive blocks 1..5
  91. continue
  92. }
  93. _ = peer.AddBlock(makeSmallBlock(i), 10)
  94. }
  95. tests := []struct {
  96. name string
  97. height int64
  98. wantErr error
  99. blockPresent bool
  100. }{
  101. {"no request", 100, errMissingBlock, false},
  102. {"no block", 6, errMissingBlock, false},
  103. {"block 1 present", 1, nil, true},
  104. {"block max present", 5, nil, true},
  105. }
  106. for _, tt := range tests {
  107. tt := tt
  108. t.Run(tt.name, func(t *testing.T) {
  109. // try to get the block
  110. b, err := peer.BlockAtHeight(tt.height)
  111. assert.Equal(t, tt.wantErr, err)
  112. assert.Equal(t, tt.blockPresent, b != nil)
  113. // remove the block
  114. peer.RemoveBlock(tt.height)
  115. _, err = peer.BlockAtHeight(tt.height)
  116. assert.Equal(t, errMissingBlock, err)
  117. })
  118. }
  119. }
  120. func TestPeerAddBlock(t *testing.T) {
  121. peer := NewBpPeer(
  122. p2p.ID(cmn.RandStr(12)), 100,
  123. func(err error, _ p2p.ID) {},
  124. nil)
  125. // request some blocks, receive one
  126. for i := 1; i <= 10; i++ {
  127. peer.RequestSent(int64(i))
  128. if i == 5 {
  129. // receive block 5
  130. _ = peer.AddBlock(makeSmallBlock(i), 10)
  131. }
  132. }
  133. tests := []struct {
  134. name string
  135. height int64
  136. wantErr error
  137. blockPresent bool
  138. }{
  139. {"no request", 50, errMissingBlock, false},
  140. {"duplicate block", 5, errDuplicateBlock, true},
  141. {"block 1 successfully received", 1, nil, true},
  142. {"block max successfully received", 10, nil, true},
  143. }
  144. for _, tt := range tests {
  145. tt := tt
  146. t.Run(tt.name, func(t *testing.T) {
  147. // try to get the block
  148. err := peer.AddBlock(makeSmallBlock(int(tt.height)), 10)
  149. assert.Equal(t, tt.wantErr, err)
  150. _, err = peer.BlockAtHeight(tt.height)
  151. assert.Equal(t, tt.blockPresent, err == nil)
  152. })
  153. }
  154. }
  155. func TestPeerOnErrFuncCalledDueToExpiration(t *testing.T) {
  156. params := &BpPeerParams{timeout: 2 * time.Millisecond}
  157. var (
  158. numErrFuncCalls int // number of calls to the onErr function
  159. lastErr error // last generated error
  160. peerTestMtx sync.Mutex // modifications of ^^ variables are also done from timer handler goroutine
  161. )
  162. peer := NewBpPeer(
  163. p2p.ID(cmn.RandStr(12)), 10,
  164. func(err error, _ p2p.ID) {
  165. peerTestMtx.Lock()
  166. defer peerTestMtx.Unlock()
  167. lastErr = err
  168. numErrFuncCalls++
  169. },
  170. params)
  171. peer.SetLogger(log.TestingLogger())
  172. peer.RequestSent(1)
  173. time.Sleep(4 * time.Millisecond)
  174. // timer should have expired by now, check that the on error function was called
  175. peerTestMtx.Lock()
  176. assert.Equal(t, 1, numErrFuncCalls)
  177. assert.Equal(t, errNoPeerResponse, lastErr)
  178. peerTestMtx.Unlock()
  179. }
  180. func TestPeerCheckRate(t *testing.T) {
  181. params := &BpPeerParams{
  182. timeout: time.Second,
  183. minRecvRate: int64(100), // 100 bytes/sec exponential moving average
  184. }
  185. peer := NewBpPeer(
  186. p2p.ID(cmn.RandStr(12)), 10,
  187. func(err error, _ p2p.ID) {},
  188. params)
  189. peer.SetLogger(log.TestingLogger())
  190. require.Nil(t, peer.CheckRate())
  191. for i := 0; i < 40; i++ {
  192. peer.RequestSent(int64(i))
  193. }
  194. // monitor starts with a higher rEMA (~ 2*minRecvRate), wait for it to go down
  195. time.Sleep(900 * time.Millisecond)
  196. // normal peer - send a bit more than 100 bytes/sec, > 10 bytes/100msec, check peer is not considered slow
  197. for i := 0; i < 10; i++ {
  198. _ = peer.AddBlock(makeSmallBlock(i), 11)
  199. time.Sleep(100 * time.Millisecond)
  200. require.Nil(t, peer.CheckRate())
  201. }
  202. // slow peer - send a bit less than 10 bytes/100msec
  203. for i := 10; i < 20; i++ {
  204. _ = peer.AddBlock(makeSmallBlock(i), 9)
  205. time.Sleep(100 * time.Millisecond)
  206. }
  207. // check peer is considered slow
  208. assert.Equal(t, errSlowPeer, peer.CheckRate())
  209. }
  210. func TestPeerCleanup(t *testing.T) {
  211. params := &BpPeerParams{timeout: 2 * time.Millisecond}
  212. peer := NewBpPeer(
  213. p2p.ID(cmn.RandStr(12)), 10,
  214. func(err error, _ p2p.ID) {},
  215. params)
  216. peer.SetLogger(log.TestingLogger())
  217. assert.Nil(t, peer.blockResponseTimer)
  218. peer.RequestSent(1)
  219. assert.NotNil(t, peer.blockResponseTimer)
  220. peer.Cleanup()
  221. checkByStoppingPeerTimer(t, peer, false)
  222. }
  223. // Check if peer timer is running or not (a running timer can be successfully stopped).
  224. // Note: stops the timer.
  225. func checkByStoppingPeerTimer(t *testing.T, peer *BpPeer, running bool) {
  226. assert.NotPanics(t, func() {
  227. stopped := peer.stopBlockResponseTimer()
  228. if running {
  229. assert.True(t, stopped)
  230. } else {
  231. assert.False(t, stopped)
  232. }
  233. })
  234. }
  235. func makeSmallBlock(height int) *types.Block {
  236. return types.MakeBlock(int64(height), []types.Tx{types.Tx("foo")}, nil, nil)
  237. }