You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

431 lines
13 KiB

blockchain: Reorg reactor (#3561) * go routines in blockchain reactor * Added reference to the go routine diagram * Initial commit * cleanup * Undo testing_logger change, committed by mistake * Fix the test loggers * pulled some fsm code into pool.go * added pool tests * changes to the design added block requests under peer moved the request trigger in the reactor poolRoutine, triggered now by a ticker in general moved everything required for making block requests smarter in the poolRoutine added a simple map of heights to keep track of what will need to be requested next added a few more tests * send errors to FSM in a different channel than blocks send errors (RemovePeer) from switch on a different channel than the one receiving blocks renamed channels added more pool tests * more pool tests * lint errors * more tests * more tests * switch fast sync to new implementation * fixed data race in tests * cleanup * finished fsm tests * address golangci comments :) * address golangci comments :) * Added timeout on next block needed to advance * updating docs and cleanup * fix issue in test from previous cleanup * cleanup * Added termination scenarios, tests and more cleanup * small fixes to adr, comments and cleanup * Fix bug in sendRequest() If we tried to send a request to a peer not present in the switch, a missing continue statement caused the request to be blackholed in a peer that was removed and never retried. While this bug was manifesting, the reactor kept asking for other blocks that would be stored and never consumed. Added the number of unconsumed blocks in the math for requesting blocks ahead of current processing height so eventually there will be no more blocks requested until the already received ones are consumed. * remove bpPeer's didTimeout field * Use distinct err codes for peer timeout and FSM timeouts * Don't allow peers to update with lower height * review comments from Ethan and Zarko * some cleanup, renaming, comments * Move block execution in separate goroutine * Remove pool's numPending * review comments * fix lint, remove old blockchain reactor and duplicates in fsm tests * small reorg around peer after review comments * add the reactor spec * verify block only once * review comments * change to int for max number of pending requests * cleanup and godoc * Add configuration flag fast sync version * golangci fixes * fix config template * move both reactor versions under blockchain * cleanup, golint, renaming stuff * updated documentation, fixed more golint warnings * integrate with behavior package * sync with master * gofmt * add changelog_pending entry * move to improvments * suggestion to changelog entry
6 years ago
blockchain: Reorg reactor (#3561) * go routines in blockchain reactor * Added reference to the go routine diagram * Initial commit * cleanup * Undo testing_logger change, committed by mistake * Fix the test loggers * pulled some fsm code into pool.go * added pool tests * changes to the design added block requests under peer moved the request trigger in the reactor poolRoutine, triggered now by a ticker in general moved everything required for making block requests smarter in the poolRoutine added a simple map of heights to keep track of what will need to be requested next added a few more tests * send errors to FSM in a different channel than blocks send errors (RemovePeer) from switch on a different channel than the one receiving blocks renamed channels added more pool tests * more pool tests * lint errors * more tests * more tests * switch fast sync to new implementation * fixed data race in tests * cleanup * finished fsm tests * address golangci comments :) * address golangci comments :) * Added timeout on next block needed to advance * updating docs and cleanup * fix issue in test from previous cleanup * cleanup * Added termination scenarios, tests and more cleanup * small fixes to adr, comments and cleanup * Fix bug in sendRequest() If we tried to send a request to a peer not present in the switch, a missing continue statement caused the request to be blackholed in a peer that was removed and never retried. While this bug was manifesting, the reactor kept asking for other blocks that would be stored and never consumed. Added the number of unconsumed blocks in the math for requesting blocks ahead of current processing height so eventually there will be no more blocks requested until the already received ones are consumed. * remove bpPeer's didTimeout field * Use distinct err codes for peer timeout and FSM timeouts * Don't allow peers to update with lower height * review comments from Ethan and Zarko * some cleanup, renaming, comments * Move block execution in separate goroutine * Remove pool's numPending * review comments * fix lint, remove old blockchain reactor and duplicates in fsm tests * small reorg around peer after review comments * add the reactor spec * verify block only once * review comments * change to int for max number of pending requests * cleanup and godoc * Add configuration flag fast sync version * golangci fixes * fix config template * move both reactor versions under blockchain * cleanup, golint, renaming stuff * updated documentation, fixed more golint warnings * integrate with behavior package * sync with master * gofmt * add changelog_pending entry * move to improvments * suggestion to changelog entry
6 years ago
blockchain: Reorg reactor (#3561) * go routines in blockchain reactor * Added reference to the go routine diagram * Initial commit * cleanup * Undo testing_logger change, committed by mistake * Fix the test loggers * pulled some fsm code into pool.go * added pool tests * changes to the design added block requests under peer moved the request trigger in the reactor poolRoutine, triggered now by a ticker in general moved everything required for making block requests smarter in the poolRoutine added a simple map of heights to keep track of what will need to be requested next added a few more tests * send errors to FSM in a different channel than blocks send errors (RemovePeer) from switch on a different channel than the one receiving blocks renamed channels added more pool tests * more pool tests * lint errors * more tests * more tests * switch fast sync to new implementation * fixed data race in tests * cleanup * finished fsm tests * address golangci comments :) * address golangci comments :) * Added timeout on next block needed to advance * updating docs and cleanup * fix issue in test from previous cleanup * cleanup * Added termination scenarios, tests and more cleanup * small fixes to adr, comments and cleanup * Fix bug in sendRequest() If we tried to send a request to a peer not present in the switch, a missing continue statement caused the request to be blackholed in a peer that was removed and never retried. While this bug was manifesting, the reactor kept asking for other blocks that would be stored and never consumed. Added the number of unconsumed blocks in the math for requesting blocks ahead of current processing height so eventually there will be no more blocks requested until the already received ones are consumed. * remove bpPeer's didTimeout field * Use distinct err codes for peer timeout and FSM timeouts * Don't allow peers to update with lower height * review comments from Ethan and Zarko * some cleanup, renaming, comments * Move block execution in separate goroutine * Remove pool's numPending * review comments * fix lint, remove old blockchain reactor and duplicates in fsm tests * small reorg around peer after review comments * add the reactor spec * verify block only once * review comments * change to int for max number of pending requests * cleanup and godoc * Add configuration flag fast sync version * golangci fixes * fix config template * move both reactor versions under blockchain * cleanup, golint, renaming stuff * updated documentation, fixed more golint warnings * integrate with behavior package * sync with master * gofmt * add changelog_pending entry * move to improvments * suggestion to changelog entry
6 years ago
blockchain: Reorg reactor (#3561) * go routines in blockchain reactor * Added reference to the go routine diagram * Initial commit * cleanup * Undo testing_logger change, committed by mistake * Fix the test loggers * pulled some fsm code into pool.go * added pool tests * changes to the design added block requests under peer moved the request trigger in the reactor poolRoutine, triggered now by a ticker in general moved everything required for making block requests smarter in the poolRoutine added a simple map of heights to keep track of what will need to be requested next added a few more tests * send errors to FSM in a different channel than blocks send errors (RemovePeer) from switch on a different channel than the one receiving blocks renamed channels added more pool tests * more pool tests * lint errors * more tests * more tests * switch fast sync to new implementation * fixed data race in tests * cleanup * finished fsm tests * address golangci comments :) * address golangci comments :) * Added timeout on next block needed to advance * updating docs and cleanup * fix issue in test from previous cleanup * cleanup * Added termination scenarios, tests and more cleanup * small fixes to adr, comments and cleanup * Fix bug in sendRequest() If we tried to send a request to a peer not present in the switch, a missing continue statement caused the request to be blackholed in a peer that was removed and never retried. While this bug was manifesting, the reactor kept asking for other blocks that would be stored and never consumed. Added the number of unconsumed blocks in the math for requesting blocks ahead of current processing height so eventually there will be no more blocks requested until the already received ones are consumed. * remove bpPeer's didTimeout field * Use distinct err codes for peer timeout and FSM timeouts * Don't allow peers to update with lower height * review comments from Ethan and Zarko * some cleanup, renaming, comments * Move block execution in separate goroutine * Remove pool's numPending * review comments * fix lint, remove old blockchain reactor and duplicates in fsm tests * small reorg around peer after review comments * add the reactor spec * verify block only once * review comments * change to int for max number of pending requests * cleanup and godoc * Add configuration flag fast sync version * golangci fixes * fix config template * move both reactor versions under blockchain * cleanup, golint, renaming stuff * updated documentation, fixed more golint warnings * integrate with behavior package * sync with master * gofmt * add changelog_pending entry * move to improvments * suggestion to changelog entry
6 years ago
8 years ago
7 years ago
add support for block pruning via ABCI Commit response (#4588) * Added BlockStore.DeleteBlock() * Added initial block pruner prototype * wip * Added BlockStore.PruneBlocks() * Added consensus setting for block pruning * Added BlockStore base * Error on replay if base does not have blocks * Handle missing blocks when sending VoteSetMaj23Message * Error message tweak * Properly update blockstore state * Error message fix again * blockchain: ignore peer missing blocks * Added FIXME * Added test for block replay with truncated history * Handle peer base in blockchain reactor * Improved replay error handling * Added tests for Store.PruneBlocks() * Fix non-RPC handling of truncated block history * Panic on missing block meta in needProofBlock() * Updated changelog * Handle truncated block history in RPC layer * Added info about earliest block in /status RPC * Reorder height and base in blockchain reactor messages * Updated changelog * Fix tests * Appease linter * Minor review fixes * Non-empty BlockStores should always have base > 0 * Update code to assume base > 0 invariant * Added blockstore tests for pruning to 0 * Make sure we don't prune below the current base * Added BlockStore.Size() * config: added retain_blocks recommendations * Update v1 blockchain reactor to handle blockstore base * Added state database pruning * Propagate errors on missing validator sets * Comment tweaks * Improved error message Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * use ABCI field ResponseCommit.retain_height instead of retain-blocks config option * remove State.RetainHeight, return value instead * fix minor issues * rename pruneHeights() to pruneBlocks() * noop to fix GitHub borkage Co-authored-by: Anton Kaliaev <anton.kalyaev@gmail.com>
5 years ago
add support for block pruning via ABCI Commit response (#4588) * Added BlockStore.DeleteBlock() * Added initial block pruner prototype * wip * Added BlockStore.PruneBlocks() * Added consensus setting for block pruning * Added BlockStore base * Error on replay if base does not have blocks * Handle missing blocks when sending VoteSetMaj23Message * Error message tweak * Properly update blockstore state * Error message fix again * blockchain: ignore peer missing blocks * Added FIXME * Added test for block replay with truncated history * Handle peer base in blockchain reactor * Improved replay error handling * Added tests for Store.PruneBlocks() * Fix non-RPC handling of truncated block history * Panic on missing block meta in needProofBlock() * Updated changelog * Handle truncated block history in RPC layer * Added info about earliest block in /status RPC * Reorder height and base in blockchain reactor messages * Updated changelog * Fix tests * Appease linter * Minor review fixes * Non-empty BlockStores should always have base > 0 * Update code to assume base > 0 invariant * Added blockstore tests for pruning to 0 * Make sure we don't prune below the current base * Added BlockStore.Size() * config: added retain_blocks recommendations * Update v1 blockchain reactor to handle blockstore base * Added state database pruning * Propagate errors on missing validator sets * Comment tweaks * Improved error message Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * use ABCI field ResponseCommit.retain_height instead of retain-blocks config option * remove State.RetainHeight, return value instead * fix minor issues * rename pruneHeights() to pruneBlocks() * noop to fix GitHub borkage Co-authored-by: Anton Kaliaev <anton.kalyaev@gmail.com>
5 years ago
7 years ago
7 years ago
8 years ago
10 years ago
add support for block pruning via ABCI Commit response (#4588) * Added BlockStore.DeleteBlock() * Added initial block pruner prototype * wip * Added BlockStore.PruneBlocks() * Added consensus setting for block pruning * Added BlockStore base * Error on replay if base does not have blocks * Handle missing blocks when sending VoteSetMaj23Message * Error message tweak * Properly update blockstore state * Error message fix again * blockchain: ignore peer missing blocks * Added FIXME * Added test for block replay with truncated history * Handle peer base in blockchain reactor * Improved replay error handling * Added tests for Store.PruneBlocks() * Fix non-RPC handling of truncated block history * Panic on missing block meta in needProofBlock() * Updated changelog * Handle truncated block history in RPC layer * Added info about earliest block in /status RPC * Reorder height and base in blockchain reactor messages * Updated changelog * Fix tests * Appease linter * Minor review fixes * Non-empty BlockStores should always have base > 0 * Update code to assume base > 0 invariant * Added blockstore tests for pruning to 0 * Make sure we don't prune below the current base * Added BlockStore.Size() * config: added retain_blocks recommendations * Update v1 blockchain reactor to handle blockstore base * Added state database pruning * Propagate errors on missing validator sets * Comment tweaks * Improved error message Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * use ABCI field ResponseCommit.retain_height instead of retain-blocks config option * remove State.RetainHeight, return value instead * fix minor issues * rename pruneHeights() to pruneBlocks() * noop to fix GitHub borkage Co-authored-by: Anton Kaliaev <anton.kalyaev@gmail.com>
5 years ago
add support for block pruning via ABCI Commit response (#4588) * Added BlockStore.DeleteBlock() * Added initial block pruner prototype * wip * Added BlockStore.PruneBlocks() * Added consensus setting for block pruning * Added BlockStore base * Error on replay if base does not have blocks * Handle missing blocks when sending VoteSetMaj23Message * Error message tweak * Properly update blockstore state * Error message fix again * blockchain: ignore peer missing blocks * Added FIXME * Added test for block replay with truncated history * Handle peer base in blockchain reactor * Improved replay error handling * Added tests for Store.PruneBlocks() * Fix non-RPC handling of truncated block history * Panic on missing block meta in needProofBlock() * Updated changelog * Handle truncated block history in RPC layer * Added info about earliest block in /status RPC * Reorder height and base in blockchain reactor messages * Updated changelog * Fix tests * Appease linter * Minor review fixes * Non-empty BlockStores should always have base > 0 * Update code to assume base > 0 invariant * Added blockstore tests for pruning to 0 * Make sure we don't prune below the current base * Added BlockStore.Size() * config: added retain_blocks recommendations * Update v1 blockchain reactor to handle blockstore base * Added state database pruning * Propagate errors on missing validator sets * Comment tweaks * Improved error message Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * use ABCI field ResponseCommit.retain_height instead of retain-blocks config option * remove State.RetainHeight, return value instead * fix minor issues * rename pruneHeights() to pruneBlocks() * noop to fix GitHub borkage Co-authored-by: Anton Kaliaev <anton.kalyaev@gmail.com>
5 years ago
add support for block pruning via ABCI Commit response (#4588) * Added BlockStore.DeleteBlock() * Added initial block pruner prototype * wip * Added BlockStore.PruneBlocks() * Added consensus setting for block pruning * Added BlockStore base * Error on replay if base does not have blocks * Handle missing blocks when sending VoteSetMaj23Message * Error message tweak * Properly update blockstore state * Error message fix again * blockchain: ignore peer missing blocks * Added FIXME * Added test for block replay with truncated history * Handle peer base in blockchain reactor * Improved replay error handling * Added tests for Store.PruneBlocks() * Fix non-RPC handling of truncated block history * Panic on missing block meta in needProofBlock() * Updated changelog * Handle truncated block history in RPC layer * Added info about earliest block in /status RPC * Reorder height and base in blockchain reactor messages * Updated changelog * Fix tests * Appease linter * Minor review fixes * Non-empty BlockStores should always have base > 0 * Update code to assume base > 0 invariant * Added blockstore tests for pruning to 0 * Make sure we don't prune below the current base * Added BlockStore.Size() * config: added retain_blocks recommendations * Update v1 blockchain reactor to handle blockstore base * Added state database pruning * Propagate errors on missing validator sets * Comment tweaks * Improved error message Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * use ABCI field ResponseCommit.retain_height instead of retain-blocks config option * remove State.RetainHeight, return value instead * fix minor issues * rename pruneHeights() to pruneBlocks() * noop to fix GitHub borkage Co-authored-by: Anton Kaliaev <anton.kalyaev@gmail.com>
5 years ago
8 years ago
add support for block pruning via ABCI Commit response (#4588) * Added BlockStore.DeleteBlock() * Added initial block pruner prototype * wip * Added BlockStore.PruneBlocks() * Added consensus setting for block pruning * Added BlockStore base * Error on replay if base does not have blocks * Handle missing blocks when sending VoteSetMaj23Message * Error message tweak * Properly update blockstore state * Error message fix again * blockchain: ignore peer missing blocks * Added FIXME * Added test for block replay with truncated history * Handle peer base in blockchain reactor * Improved replay error handling * Added tests for Store.PruneBlocks() * Fix non-RPC handling of truncated block history * Panic on missing block meta in needProofBlock() * Updated changelog * Handle truncated block history in RPC layer * Added info about earliest block in /status RPC * Reorder height and base in blockchain reactor messages * Updated changelog * Fix tests * Appease linter * Minor review fixes * Non-empty BlockStores should always have base > 0 * Update code to assume base > 0 invariant * Added blockstore tests for pruning to 0 * Make sure we don't prune below the current base * Added BlockStore.Size() * config: added retain_blocks recommendations * Update v1 blockchain reactor to handle blockstore base * Added state database pruning * Propagate errors on missing validator sets * Comment tweaks * Improved error message Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * use ABCI field ResponseCommit.retain_height instead of retain-blocks config option * remove State.RetainHeight, return value instead * fix minor issues * rename pruneHeights() to pruneBlocks() * noop to fix GitHub borkage Co-authored-by: Anton Kaliaev <anton.kalyaev@gmail.com>
5 years ago
add support for block pruning via ABCI Commit response (#4588) * Added BlockStore.DeleteBlock() * Added initial block pruner prototype * wip * Added BlockStore.PruneBlocks() * Added consensus setting for block pruning * Added BlockStore base * Error on replay if base does not have blocks * Handle missing blocks when sending VoteSetMaj23Message * Error message tweak * Properly update blockstore state * Error message fix again * blockchain: ignore peer missing blocks * Added FIXME * Added test for block replay with truncated history * Handle peer base in blockchain reactor * Improved replay error handling * Added tests for Store.PruneBlocks() * Fix non-RPC handling of truncated block history * Panic on missing block meta in needProofBlock() * Updated changelog * Handle truncated block history in RPC layer * Added info about earliest block in /status RPC * Reorder height and base in blockchain reactor messages * Updated changelog * Fix tests * Appease linter * Minor review fixes * Non-empty BlockStores should always have base > 0 * Update code to assume base > 0 invariant * Added blockstore tests for pruning to 0 * Make sure we don't prune below the current base * Added BlockStore.Size() * config: added retain_blocks recommendations * Update v1 blockchain reactor to handle blockstore base * Added state database pruning * Propagate errors on missing validator sets * Comment tweaks * Improved error message Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * use ABCI field ResponseCommit.retain_height instead of retain-blocks config option * remove State.RetainHeight, return value instead * fix minor issues * rename pruneHeights() to pruneBlocks() * noop to fix GitHub borkage Co-authored-by: Anton Kaliaev <anton.kalyaev@gmail.com>
5 years ago
add support for block pruning via ABCI Commit response (#4588) * Added BlockStore.DeleteBlock() * Added initial block pruner prototype * wip * Added BlockStore.PruneBlocks() * Added consensus setting for block pruning * Added BlockStore base * Error on replay if base does not have blocks * Handle missing blocks when sending VoteSetMaj23Message * Error message tweak * Properly update blockstore state * Error message fix again * blockchain: ignore peer missing blocks * Added FIXME * Added test for block replay with truncated history * Handle peer base in blockchain reactor * Improved replay error handling * Added tests for Store.PruneBlocks() * Fix non-RPC handling of truncated block history * Panic on missing block meta in needProofBlock() * Updated changelog * Handle truncated block history in RPC layer * Added info about earliest block in /status RPC * Reorder height and base in blockchain reactor messages * Updated changelog * Fix tests * Appease linter * Minor review fixes * Non-empty BlockStores should always have base > 0 * Update code to assume base > 0 invariant * Added blockstore tests for pruning to 0 * Make sure we don't prune below the current base * Added BlockStore.Size() * config: added retain_blocks recommendations * Update v1 blockchain reactor to handle blockstore base * Added state database pruning * Propagate errors on missing validator sets * Comment tweaks * Improved error message Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * use ABCI field ResponseCommit.retain_height instead of retain-blocks config option * remove State.RetainHeight, return value instead * fix minor issues * rename pruneHeights() to pruneBlocks() * noop to fix GitHub borkage Co-authored-by: Anton Kaliaev <anton.kalyaev@gmail.com>
5 years ago
  1. package v0
  2. import (
  3. "fmt"
  4. "reflect"
  5. "time"
  6. bc "github.com/tendermint/tendermint/blockchain"
  7. "github.com/tendermint/tendermint/libs/log"
  8. "github.com/tendermint/tendermint/p2p"
  9. bcproto "github.com/tendermint/tendermint/proto/tendermint/blockchain"
  10. sm "github.com/tendermint/tendermint/state"
  11. "github.com/tendermint/tendermint/store"
  12. "github.com/tendermint/tendermint/types"
  13. )
  14. const (
  15. // BlockchainChannel is a channel for blocks and status updates (`BlockStore` height)
  16. BlockchainChannel = byte(0x40)
  17. trySyncIntervalMS = 10
  18. // stop syncing when last block's time is
  19. // within this much of the system time.
  20. // stopSyncingDurationMinutes = 10
  21. // ask for best height every 10s
  22. statusUpdateIntervalSeconds = 10
  23. // check if we should switch to consensus reactor
  24. switchToConsensusIntervalSeconds = 1
  25. )
  26. type consensusReactor interface {
  27. // for when we switch from blockchain reactor and fast sync to
  28. // the consensus machine
  29. SwitchToConsensus(state sm.State, skipWAL bool)
  30. }
  31. type peerError struct {
  32. err error
  33. peerID p2p.ID
  34. }
  35. func (e peerError) Error() string {
  36. return fmt.Sprintf("error with peer %v: %s", e.peerID, e.err.Error())
  37. }
  38. // BlockchainReactor handles long-term catchup syncing.
  39. type BlockchainReactor struct {
  40. p2p.BaseReactor
  41. // immutable
  42. initialState sm.State
  43. blockExec *sm.BlockExecutor
  44. store *store.BlockStore
  45. pool *BlockPool
  46. fastSync bool
  47. requestsCh <-chan BlockRequest
  48. errorsCh <-chan peerError
  49. }
  50. // NewBlockchainReactor returns new reactor instance.
  51. func NewBlockchainReactor(state sm.State, blockExec *sm.BlockExecutor, store *store.BlockStore,
  52. fastSync bool) *BlockchainReactor {
  53. if state.LastBlockHeight != store.Height() {
  54. panic(fmt.Sprintf("state (%v) and store (%v) height mismatch", state.LastBlockHeight,
  55. store.Height()))
  56. }
  57. requestsCh := make(chan BlockRequest, maxTotalRequesters)
  58. const capacity = 1000 // must be bigger than peers count
  59. errorsCh := make(chan peerError, capacity) // so we don't block in #Receive#pool.AddBlock
  60. startHeight := store.Height() + 1
  61. if startHeight == 1 {
  62. startHeight = state.InitialHeight
  63. }
  64. pool := NewBlockPool(startHeight, requestsCh, errorsCh)
  65. bcR := &BlockchainReactor{
  66. initialState: state,
  67. blockExec: blockExec,
  68. store: store,
  69. pool: pool,
  70. fastSync: fastSync,
  71. requestsCh: requestsCh,
  72. errorsCh: errorsCh,
  73. }
  74. bcR.BaseReactor = *p2p.NewBaseReactor("BlockchainReactor", bcR)
  75. return bcR
  76. }
  77. // SetLogger implements service.Service by setting the logger on reactor and pool.
  78. func (bcR *BlockchainReactor) SetLogger(l log.Logger) {
  79. bcR.BaseService.Logger = l
  80. bcR.pool.Logger = l
  81. }
  82. // OnStart implements service.Service.
  83. func (bcR *BlockchainReactor) OnStart() error {
  84. if bcR.fastSync {
  85. err := bcR.pool.Start()
  86. if err != nil {
  87. return err
  88. }
  89. go bcR.poolRoutine(false)
  90. }
  91. return nil
  92. }
  93. // SwitchToFastSync is called by the state sync reactor when switching to fast sync.
  94. func (bcR *BlockchainReactor) SwitchToFastSync(state sm.State) error {
  95. bcR.fastSync = true
  96. bcR.initialState = state
  97. bcR.pool.height = state.LastBlockHeight + 1
  98. err := bcR.pool.Start()
  99. if err != nil {
  100. return err
  101. }
  102. go bcR.poolRoutine(true)
  103. return nil
  104. }
  105. // OnStop implements service.Service.
  106. func (bcR *BlockchainReactor) OnStop() {
  107. if bcR.fastSync {
  108. if err := bcR.pool.Stop(); err != nil {
  109. bcR.Logger.Error("Error stopping pool", "err", err)
  110. }
  111. }
  112. }
  113. // GetChannels implements Reactor
  114. func (bcR *BlockchainReactor) GetChannels() []*p2p.ChannelDescriptor {
  115. return []*p2p.ChannelDescriptor{
  116. {
  117. ID: BlockchainChannel,
  118. Priority: 5,
  119. SendQueueCapacity: 1000,
  120. RecvBufferCapacity: 50 * 4096,
  121. RecvMessageCapacity: bc.MaxMsgSize,
  122. },
  123. }
  124. }
  125. // AddPeer implements Reactor by sending our state to peer.
  126. func (bcR *BlockchainReactor) AddPeer(peer p2p.Peer) {
  127. msgBytes, err := bc.EncodeMsg(&bcproto.StatusResponse{
  128. Base: bcR.store.Base(),
  129. Height: bcR.store.Height()})
  130. if err != nil {
  131. bcR.Logger.Error("could not convert msg to protobuf", "err", err)
  132. return
  133. }
  134. peer.Send(BlockchainChannel, msgBytes)
  135. // it's OK if send fails. will try later in poolRoutine
  136. // peer is added to the pool once we receive the first
  137. // bcStatusResponseMessage from the peer and call pool.SetPeerRange
  138. }
  139. // RemovePeer implements Reactor by removing peer from the pool.
  140. func (bcR *BlockchainReactor) RemovePeer(peer p2p.Peer, reason interface{}) {
  141. bcR.pool.RemovePeer(peer.ID())
  142. }
  143. // respondToPeer loads a block and sends it to the requesting peer,
  144. // if we have it. Otherwise, we'll respond saying we don't have it.
  145. func (bcR *BlockchainReactor) respondToPeer(msg *bcproto.BlockRequest,
  146. src p2p.Peer) (queued bool) {
  147. block := bcR.store.LoadBlock(msg.Height)
  148. if block != nil {
  149. bl, err := block.ToProto()
  150. if err != nil {
  151. bcR.Logger.Error("could not convert msg to protobuf", "err", err)
  152. return false
  153. }
  154. msgBytes, err := bc.EncodeMsg(&bcproto.BlockResponse{Block: bl})
  155. if err != nil {
  156. bcR.Logger.Error("could not marshal msg", "err", err)
  157. return false
  158. }
  159. return src.TrySend(BlockchainChannel, msgBytes)
  160. }
  161. bcR.Logger.Info("Peer asking for a block we don't have", "src", src, "height", msg.Height)
  162. msgBytes, err := bc.EncodeMsg(&bcproto.NoBlockResponse{Height: msg.Height})
  163. if err != nil {
  164. bcR.Logger.Error("could not convert msg to protobuf", "err", err)
  165. return false
  166. }
  167. return src.TrySend(BlockchainChannel, msgBytes)
  168. }
  169. // Receive implements Reactor by handling 4 types of messages (look below).
  170. func (bcR *BlockchainReactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
  171. msg, err := bc.DecodeMsg(msgBytes)
  172. if err != nil {
  173. bcR.Logger.Error("Error decoding message", "src", src, "chId", chID, "err", err)
  174. bcR.Switch.StopPeerForError(src, err)
  175. return
  176. }
  177. if err = bc.ValidateMsg(msg); err != nil {
  178. bcR.Logger.Error("Peer sent us invalid msg", "peer", src, "msg", msg, "err", err)
  179. bcR.Switch.StopPeerForError(src, err)
  180. return
  181. }
  182. bcR.Logger.Debug("Receive", "src", src, "chID", chID, "msg", msg)
  183. switch msg := msg.(type) {
  184. case *bcproto.BlockRequest:
  185. bcR.respondToPeer(msg, src)
  186. case *bcproto.BlockResponse:
  187. bi, err := types.BlockFromProto(msg.Block)
  188. if err != nil {
  189. bcR.Logger.Error("Block content is invalid", "err", err)
  190. return
  191. }
  192. bcR.pool.AddBlock(src.ID(), bi, len(msgBytes))
  193. case *bcproto.StatusRequest:
  194. // Send peer our state.
  195. msgBytes, err := bc.EncodeMsg(&bcproto.StatusResponse{
  196. Height: bcR.store.Height(),
  197. Base: bcR.store.Base(),
  198. })
  199. if err != nil {
  200. bcR.Logger.Error("could not convert msg to protobut", "err", err)
  201. return
  202. }
  203. src.TrySend(BlockchainChannel, msgBytes)
  204. case *bcproto.StatusResponse:
  205. // Got a peer status. Unverified.
  206. bcR.pool.SetPeerRange(src.ID(), msg.Base, msg.Height)
  207. case *bcproto.NoBlockResponse:
  208. bcR.Logger.Debug("Peer does not have requested block", "peer", src, "height", msg.Height)
  209. default:
  210. bcR.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg)))
  211. }
  212. }
  213. // Handle messages from the poolReactor telling the reactor what to do.
  214. // NOTE: Don't sleep in the FOR_LOOP or otherwise slow it down!
  215. func (bcR *BlockchainReactor) poolRoutine(stateSynced bool) {
  216. trySyncTicker := time.NewTicker(trySyncIntervalMS * time.Millisecond)
  217. defer trySyncTicker.Stop()
  218. statusUpdateTicker := time.NewTicker(statusUpdateIntervalSeconds * time.Second)
  219. defer statusUpdateTicker.Stop()
  220. switchToConsensusTicker := time.NewTicker(switchToConsensusIntervalSeconds * time.Second)
  221. defer switchToConsensusTicker.Stop()
  222. blocksSynced := uint64(0)
  223. chainID := bcR.initialState.ChainID
  224. state := bcR.initialState
  225. lastHundred := time.Now()
  226. lastRate := 0.0
  227. didProcessCh := make(chan struct{}, 1)
  228. go func() {
  229. for {
  230. select {
  231. case <-bcR.Quit():
  232. return
  233. case <-bcR.pool.Quit():
  234. return
  235. case request := <-bcR.requestsCh:
  236. peer := bcR.Switch.Peers().Get(request.PeerID)
  237. if peer == nil {
  238. continue
  239. }
  240. msgBytes, err := bc.EncodeMsg(&bcproto.BlockRequest{Height: request.Height})
  241. if err != nil {
  242. bcR.Logger.Error("could not convert msg to proto", "err", err)
  243. continue
  244. }
  245. queued := peer.TrySend(BlockchainChannel, msgBytes)
  246. if !queued {
  247. bcR.Logger.Debug("Send queue is full, drop block request", "peer", peer.ID(), "height", request.Height)
  248. }
  249. case err := <-bcR.errorsCh:
  250. peer := bcR.Switch.Peers().Get(err.peerID)
  251. if peer != nil {
  252. bcR.Switch.StopPeerForError(peer, err)
  253. }
  254. case <-statusUpdateTicker.C:
  255. // ask for status updates
  256. go bcR.BroadcastStatusRequest() // nolint: errcheck
  257. }
  258. }
  259. }()
  260. FOR_LOOP:
  261. for {
  262. select {
  263. case <-switchToConsensusTicker.C:
  264. height, numPending, lenRequesters := bcR.pool.GetStatus()
  265. outbound, inbound, _ := bcR.Switch.NumPeers()
  266. bcR.Logger.Debug("Consensus ticker", "numPending", numPending, "total", lenRequesters,
  267. "outbound", outbound, "inbound", inbound)
  268. if bcR.pool.IsCaughtUp() {
  269. bcR.Logger.Info("Time to switch to consensus reactor!", "height", height)
  270. if err := bcR.pool.Stop(); err != nil {
  271. bcR.Logger.Error("Error stopping pool", "err", err)
  272. }
  273. conR, ok := bcR.Switch.Reactor("CONSENSUS").(consensusReactor)
  274. if ok {
  275. conR.SwitchToConsensus(state, blocksSynced > 0 || stateSynced)
  276. }
  277. // else {
  278. // should only happen during testing
  279. // }
  280. break FOR_LOOP
  281. }
  282. case <-trySyncTicker.C: // chan time
  283. select {
  284. case didProcessCh <- struct{}{}:
  285. default:
  286. }
  287. case <-didProcessCh:
  288. // NOTE: It is a subtle mistake to process more than a single block
  289. // at a time (e.g. 10) here, because we only TrySend 1 request per
  290. // loop. The ratio mismatch can result in starving of blocks, a
  291. // sudden burst of requests and responses, and repeat.
  292. // Consequently, it is better to split these routines rather than
  293. // coupling them as it's written here. TODO uncouple from request
  294. // routine.
  295. // See if there are any blocks to sync.
  296. first, second := bcR.pool.PeekTwoBlocks()
  297. // bcR.Logger.Info("TrySync peeked", "first", first, "second", second)
  298. if first == nil || second == nil {
  299. // We need both to sync the first block.
  300. continue FOR_LOOP
  301. } else {
  302. // Try again quickly next loop.
  303. didProcessCh <- struct{}{}
  304. }
  305. firstParts := first.MakePartSet(types.BlockPartSizeBytes)
  306. firstPartSetHeader := firstParts.Header()
  307. firstID := types.BlockID{Hash: first.Hash(), PartSetHeader: firstPartSetHeader}
  308. // Finally, verify the first block using the second's commit
  309. // NOTE: we can probably make this more efficient, but note that calling
  310. // first.Hash() doesn't verify the tx contents, so MakePartSet() is
  311. // currently necessary.
  312. err := state.Validators.VerifyCommitLight(
  313. chainID, firstID, first.Height, second.LastCommit)
  314. if err != nil {
  315. bcR.Logger.Error("Error in validation", "err", err)
  316. peerID := bcR.pool.RedoRequest(first.Height)
  317. peer := bcR.Switch.Peers().Get(peerID)
  318. if peer != nil {
  319. // NOTE: we've already removed the peer's request, but we
  320. // still need to clean up the rest.
  321. bcR.Switch.StopPeerForError(peer, fmt.Errorf("blockchainReactor validation error: %v", err))
  322. }
  323. peerID2 := bcR.pool.RedoRequest(second.Height)
  324. peer2 := bcR.Switch.Peers().Get(peerID2)
  325. if peer2 != nil && peer2 != peer {
  326. // NOTE: we've already removed the peer's request, but we
  327. // still need to clean up the rest.
  328. bcR.Switch.StopPeerForError(peer2, fmt.Errorf("blockchainReactor validation error: %v", err))
  329. }
  330. continue FOR_LOOP
  331. } else {
  332. bcR.pool.PopRequest()
  333. // TODO: batch saves so we dont persist to disk every block
  334. bcR.store.SaveBlock(first, firstParts, second.LastCommit)
  335. // TODO: same thing for app - but we would need a way to
  336. // get the hash without persisting the state
  337. var err error
  338. state, _, err = bcR.blockExec.ApplyBlock(state, firstID, first)
  339. if err != nil {
  340. // TODO This is bad, are we zombie?
  341. panic(fmt.Sprintf("Failed to process committed block (%d:%X): %v", first.Height, first.Hash(), err))
  342. }
  343. blocksSynced++
  344. if blocksSynced%100 == 0 {
  345. lastRate = 0.9*lastRate + 0.1*(100/time.Since(lastHundred).Seconds())
  346. bcR.Logger.Info("Fast Sync Rate", "height", bcR.pool.height,
  347. "max_peer_height", bcR.pool.MaxPeerHeight(), "blocks/s", lastRate)
  348. lastHundred = time.Now()
  349. }
  350. }
  351. continue FOR_LOOP
  352. case <-bcR.Quit():
  353. break FOR_LOOP
  354. }
  355. }
  356. }
  357. // BroadcastStatusRequest broadcasts `BlockStore` base and height.
  358. func (bcR *BlockchainReactor) BroadcastStatusRequest() error {
  359. bm, err := bc.EncodeMsg(&bcproto.StatusRequest{})
  360. if err != nil {
  361. bcR.Logger.Error("could not convert msg to proto", "err", err)
  362. return fmt.Errorf("could not convert msg to proto: %w", err)
  363. }
  364. bcR.Switch.Broadcast(BlockchainChannel, bm)
  365. return nil
  366. }