You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

458 lines
13 KiB

blockchain: Reorg reactor (#3561) * go routines in blockchain reactor * Added reference to the go routine diagram * Initial commit * cleanup * Undo testing_logger change, committed by mistake * Fix the test loggers * pulled some fsm code into pool.go * added pool tests * changes to the design added block requests under peer moved the request trigger in the reactor poolRoutine, triggered now by a ticker in general moved everything required for making block requests smarter in the poolRoutine added a simple map of heights to keep track of what will need to be requested next added a few more tests * send errors to FSM in a different channel than blocks send errors (RemovePeer) from switch on a different channel than the one receiving blocks renamed channels added more pool tests * more pool tests * lint errors * more tests * more tests * switch fast sync to new implementation * fixed data race in tests * cleanup * finished fsm tests * address golangci comments :) * address golangci comments :) * Added timeout on next block needed to advance * updating docs and cleanup * fix issue in test from previous cleanup * cleanup * Added termination scenarios, tests and more cleanup * small fixes to adr, comments and cleanup * Fix bug in sendRequest() If we tried to send a request to a peer not present in the switch, a missing continue statement caused the request to be blackholed in a peer that was removed and never retried. While this bug was manifesting, the reactor kept asking for other blocks that would be stored and never consumed. Added the number of unconsumed blocks in the math for requesting blocks ahead of current processing height so eventually there will be no more blocks requested until the already received ones are consumed. * remove bpPeer's didTimeout field * Use distinct err codes for peer timeout and FSM timeouts * Don't allow peers to update with lower height * review comments from Ethan and Zarko * some cleanup, renaming, comments * Move block execution in separate goroutine * Remove pool's numPending * review comments * fix lint, remove old blockchain reactor and duplicates in fsm tests * small reorg around peer after review comments * add the reactor spec * verify block only once * review comments * change to int for max number of pending requests * cleanup and godoc * Add configuration flag fast sync version * golangci fixes * fix config template * move both reactor versions under blockchain * cleanup, golint, renaming stuff * updated documentation, fixed more golint warnings * integrate with behavior package * sync with master * gofmt * add changelog_pending entry * move to improvments * suggestion to changelog entry
5 years ago
blockchain: Reorg reactor (#3561) * go routines in blockchain reactor * Added reference to the go routine diagram * Initial commit * cleanup * Undo testing_logger change, committed by mistake * Fix the test loggers * pulled some fsm code into pool.go * added pool tests * changes to the design added block requests under peer moved the request trigger in the reactor poolRoutine, triggered now by a ticker in general moved everything required for making block requests smarter in the poolRoutine added a simple map of heights to keep track of what will need to be requested next added a few more tests * send errors to FSM in a different channel than blocks send errors (RemovePeer) from switch on a different channel than the one receiving blocks renamed channels added more pool tests * more pool tests * lint errors * more tests * more tests * switch fast sync to new implementation * fixed data race in tests * cleanup * finished fsm tests * address golangci comments :) * address golangci comments :) * Added timeout on next block needed to advance * updating docs and cleanup * fix issue in test from previous cleanup * cleanup * Added termination scenarios, tests and more cleanup * small fixes to adr, comments and cleanup * Fix bug in sendRequest() If we tried to send a request to a peer not present in the switch, a missing continue statement caused the request to be blackholed in a peer that was removed and never retried. While this bug was manifesting, the reactor kept asking for other blocks that would be stored and never consumed. Added the number of unconsumed blocks in the math for requesting blocks ahead of current processing height so eventually there will be no more blocks requested until the already received ones are consumed. * remove bpPeer's didTimeout field * Use distinct err codes for peer timeout and FSM timeouts * Don't allow peers to update with lower height * review comments from Ethan and Zarko * some cleanup, renaming, comments * Move block execution in separate goroutine * Remove pool's numPending * review comments * fix lint, remove old blockchain reactor and duplicates in fsm tests * small reorg around peer after review comments * add the reactor spec * verify block only once * review comments * change to int for max number of pending requests * cleanup and godoc * Add configuration flag fast sync version * golangci fixes * fix config template * move both reactor versions under blockchain * cleanup, golint, renaming stuff * updated documentation, fixed more golint warnings * integrate with behavior package * sync with master * gofmt * add changelog_pending entry * move to improvments * suggestion to changelog entry
5 years ago
blockchain: Reorg reactor (#3561) * go routines in blockchain reactor * Added reference to the go routine diagram * Initial commit * cleanup * Undo testing_logger change, committed by mistake * Fix the test loggers * pulled some fsm code into pool.go * added pool tests * changes to the design added block requests under peer moved the request trigger in the reactor poolRoutine, triggered now by a ticker in general moved everything required for making block requests smarter in the poolRoutine added a simple map of heights to keep track of what will need to be requested next added a few more tests * send errors to FSM in a different channel than blocks send errors (RemovePeer) from switch on a different channel than the one receiving blocks renamed channels added more pool tests * more pool tests * lint errors * more tests * more tests * switch fast sync to new implementation * fixed data race in tests * cleanup * finished fsm tests * address golangci comments :) * address golangci comments :) * Added timeout on next block needed to advance * updating docs and cleanup * fix issue in test from previous cleanup * cleanup * Added termination scenarios, tests and more cleanup * small fixes to adr, comments and cleanup * Fix bug in sendRequest() If we tried to send a request to a peer not present in the switch, a missing continue statement caused the request to be blackholed in a peer that was removed and never retried. While this bug was manifesting, the reactor kept asking for other blocks that would be stored and never consumed. Added the number of unconsumed blocks in the math for requesting blocks ahead of current processing height so eventually there will be no more blocks requested until the already received ones are consumed. * remove bpPeer's didTimeout field * Use distinct err codes for peer timeout and FSM timeouts * Don't allow peers to update with lower height * review comments from Ethan and Zarko * some cleanup, renaming, comments * Move block execution in separate goroutine * Remove pool's numPending * review comments * fix lint, remove old blockchain reactor and duplicates in fsm tests * small reorg around peer after review comments * add the reactor spec * verify block only once * review comments * change to int for max number of pending requests * cleanup and godoc * Add configuration flag fast sync version * golangci fixes * fix config template * move both reactor versions under blockchain * cleanup, golint, renaming stuff * updated documentation, fixed more golint warnings * integrate with behavior package * sync with master * gofmt * add changelog_pending entry * move to improvments * suggestion to changelog entry
5 years ago
blockchain: Reorg reactor (#3561) * go routines in blockchain reactor * Added reference to the go routine diagram * Initial commit * cleanup * Undo testing_logger change, committed by mistake * Fix the test loggers * pulled some fsm code into pool.go * added pool tests * changes to the design added block requests under peer moved the request trigger in the reactor poolRoutine, triggered now by a ticker in general moved everything required for making block requests smarter in the poolRoutine added a simple map of heights to keep track of what will need to be requested next added a few more tests * send errors to FSM in a different channel than blocks send errors (RemovePeer) from switch on a different channel than the one receiving blocks renamed channels added more pool tests * more pool tests * lint errors * more tests * more tests * switch fast sync to new implementation * fixed data race in tests * cleanup * finished fsm tests * address golangci comments :) * address golangci comments :) * Added timeout on next block needed to advance * updating docs and cleanup * fix issue in test from previous cleanup * cleanup * Added termination scenarios, tests and more cleanup * small fixes to adr, comments and cleanup * Fix bug in sendRequest() If we tried to send a request to a peer not present in the switch, a missing continue statement caused the request to be blackholed in a peer that was removed and never retried. While this bug was manifesting, the reactor kept asking for other blocks that would be stored and never consumed. Added the number of unconsumed blocks in the math for requesting blocks ahead of current processing height so eventually there will be no more blocks requested until the already received ones are consumed. * remove bpPeer's didTimeout field * Use distinct err codes for peer timeout and FSM timeouts * Don't allow peers to update with lower height * review comments from Ethan and Zarko * some cleanup, renaming, comments * Move block execution in separate goroutine * Remove pool's numPending * review comments * fix lint, remove old blockchain reactor and duplicates in fsm tests * small reorg around peer after review comments * add the reactor spec * verify block only once * review comments * change to int for max number of pending requests * cleanup and godoc * Add configuration flag fast sync version * golangci fixes * fix config template * move both reactor versions under blockchain * cleanup, golint, renaming stuff * updated documentation, fixed more golint warnings * integrate with behavior package * sync with master * gofmt * add changelog_pending entry * move to improvments * suggestion to changelog entry
5 years ago
8 years ago
7 years ago
add support for block pruning via ABCI Commit response (#4588) * Added BlockStore.DeleteBlock() * Added initial block pruner prototype * wip * Added BlockStore.PruneBlocks() * Added consensus setting for block pruning * Added BlockStore base * Error on replay if base does not have blocks * Handle missing blocks when sending VoteSetMaj23Message * Error message tweak * Properly update blockstore state * Error message fix again * blockchain: ignore peer missing blocks * Added FIXME * Added test for block replay with truncated history * Handle peer base in blockchain reactor * Improved replay error handling * Added tests for Store.PruneBlocks() * Fix non-RPC handling of truncated block history * Panic on missing block meta in needProofBlock() * Updated changelog * Handle truncated block history in RPC layer * Added info about earliest block in /status RPC * Reorder height and base in blockchain reactor messages * Updated changelog * Fix tests * Appease linter * Minor review fixes * Non-empty BlockStores should always have base > 0 * Update code to assume base > 0 invariant * Added blockstore tests for pruning to 0 * Make sure we don't prune below the current base * Added BlockStore.Size() * config: added retain_blocks recommendations * Update v1 blockchain reactor to handle blockstore base * Added state database pruning * Propagate errors on missing validator sets * Comment tweaks * Improved error message Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * use ABCI field ResponseCommit.retain_height instead of retain-blocks config option * remove State.RetainHeight, return value instead * fix minor issues * rename pruneHeights() to pruneBlocks() * noop to fix GitHub borkage Co-authored-by: Anton Kaliaev <anton.kalyaev@gmail.com>
5 years ago
add support for block pruning via ABCI Commit response (#4588) * Added BlockStore.DeleteBlock() * Added initial block pruner prototype * wip * Added BlockStore.PruneBlocks() * Added consensus setting for block pruning * Added BlockStore base * Error on replay if base does not have blocks * Handle missing blocks when sending VoteSetMaj23Message * Error message tweak * Properly update blockstore state * Error message fix again * blockchain: ignore peer missing blocks * Added FIXME * Added test for block replay with truncated history * Handle peer base in blockchain reactor * Improved replay error handling * Added tests for Store.PruneBlocks() * Fix non-RPC handling of truncated block history * Panic on missing block meta in needProofBlock() * Updated changelog * Handle truncated block history in RPC layer * Added info about earliest block in /status RPC * Reorder height and base in blockchain reactor messages * Updated changelog * Fix tests * Appease linter * Minor review fixes * Non-empty BlockStores should always have base > 0 * Update code to assume base > 0 invariant * Added blockstore tests for pruning to 0 * Make sure we don't prune below the current base * Added BlockStore.Size() * config: added retain_blocks recommendations * Update v1 blockchain reactor to handle blockstore base * Added state database pruning * Propagate errors on missing validator sets * Comment tweaks * Improved error message Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * use ABCI field ResponseCommit.retain_height instead of retain-blocks config option * remove State.RetainHeight, return value instead * fix minor issues * rename pruneHeights() to pruneBlocks() * noop to fix GitHub borkage Co-authored-by: Anton Kaliaev <anton.kalyaev@gmail.com>
5 years ago
7 years ago
7 years ago
10 years ago
add support for block pruning via ABCI Commit response (#4588) * Added BlockStore.DeleteBlock() * Added initial block pruner prototype * wip * Added BlockStore.PruneBlocks() * Added consensus setting for block pruning * Added BlockStore base * Error on replay if base does not have blocks * Handle missing blocks when sending VoteSetMaj23Message * Error message tweak * Properly update blockstore state * Error message fix again * blockchain: ignore peer missing blocks * Added FIXME * Added test for block replay with truncated history * Handle peer base in blockchain reactor * Improved replay error handling * Added tests for Store.PruneBlocks() * Fix non-RPC handling of truncated block history * Panic on missing block meta in needProofBlock() * Updated changelog * Handle truncated block history in RPC layer * Added info about earliest block in /status RPC * Reorder height and base in blockchain reactor messages * Updated changelog * Fix tests * Appease linter * Minor review fixes * Non-empty BlockStores should always have base > 0 * Update code to assume base > 0 invariant * Added blockstore tests for pruning to 0 * Make sure we don't prune below the current base * Added BlockStore.Size() * config: added retain_blocks recommendations * Update v1 blockchain reactor to handle blockstore base * Added state database pruning * Propagate errors on missing validator sets * Comment tweaks * Improved error message Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * use ABCI field ResponseCommit.retain_height instead of retain-blocks config option * remove State.RetainHeight, return value instead * fix minor issues * rename pruneHeights() to pruneBlocks() * noop to fix GitHub borkage Co-authored-by: Anton Kaliaev <anton.kalyaev@gmail.com>
5 years ago
add support for block pruning via ABCI Commit response (#4588) * Added BlockStore.DeleteBlock() * Added initial block pruner prototype * wip * Added BlockStore.PruneBlocks() * Added consensus setting for block pruning * Added BlockStore base * Error on replay if base does not have blocks * Handle missing blocks when sending VoteSetMaj23Message * Error message tweak * Properly update blockstore state * Error message fix again * blockchain: ignore peer missing blocks * Added FIXME * Added test for block replay with truncated history * Handle peer base in blockchain reactor * Improved replay error handling * Added tests for Store.PruneBlocks() * Fix non-RPC handling of truncated block history * Panic on missing block meta in needProofBlock() * Updated changelog * Handle truncated block history in RPC layer * Added info about earliest block in /status RPC * Reorder height and base in blockchain reactor messages * Updated changelog * Fix tests * Appease linter * Minor review fixes * Non-empty BlockStores should always have base > 0 * Update code to assume base > 0 invariant * Added blockstore tests for pruning to 0 * Make sure we don't prune below the current base * Added BlockStore.Size() * config: added retain_blocks recommendations * Update v1 blockchain reactor to handle blockstore base * Added state database pruning * Propagate errors on missing validator sets * Comment tweaks * Improved error message Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * use ABCI field ResponseCommit.retain_height instead of retain-blocks config option * remove State.RetainHeight, return value instead * fix minor issues * rename pruneHeights() to pruneBlocks() * noop to fix GitHub borkage Co-authored-by: Anton Kaliaev <anton.kalyaev@gmail.com>
5 years ago
8 years ago
add support for block pruning via ABCI Commit response (#4588) * Added BlockStore.DeleteBlock() * Added initial block pruner prototype * wip * Added BlockStore.PruneBlocks() * Added consensus setting for block pruning * Added BlockStore base * Error on replay if base does not have blocks * Handle missing blocks when sending VoteSetMaj23Message * Error message tweak * Properly update blockstore state * Error message fix again * blockchain: ignore peer missing blocks * Added FIXME * Added test for block replay with truncated history * Handle peer base in blockchain reactor * Improved replay error handling * Added tests for Store.PruneBlocks() * Fix non-RPC handling of truncated block history * Panic on missing block meta in needProofBlock() * Updated changelog * Handle truncated block history in RPC layer * Added info about earliest block in /status RPC * Reorder height and base in blockchain reactor messages * Updated changelog * Fix tests * Appease linter * Minor review fixes * Non-empty BlockStores should always have base > 0 * Update code to assume base > 0 invariant * Added blockstore tests for pruning to 0 * Make sure we don't prune below the current base * Added BlockStore.Size() * config: added retain_blocks recommendations * Update v1 blockchain reactor to handle blockstore base * Added state database pruning * Propagate errors on missing validator sets * Comment tweaks * Improved error message Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * use ABCI field ResponseCommit.retain_height instead of retain-blocks config option * remove State.RetainHeight, return value instead * fix minor issues * rename pruneHeights() to pruneBlocks() * noop to fix GitHub borkage Co-authored-by: Anton Kaliaev <anton.kalyaev@gmail.com>
5 years ago
add support for block pruning via ABCI Commit response (#4588) * Added BlockStore.DeleteBlock() * Added initial block pruner prototype * wip * Added BlockStore.PruneBlocks() * Added consensus setting for block pruning * Added BlockStore base * Error on replay if base does not have blocks * Handle missing blocks when sending VoteSetMaj23Message * Error message tweak * Properly update blockstore state * Error message fix again * blockchain: ignore peer missing blocks * Added FIXME * Added test for block replay with truncated history * Handle peer base in blockchain reactor * Improved replay error handling * Added tests for Store.PruneBlocks() * Fix non-RPC handling of truncated block history * Panic on missing block meta in needProofBlock() * Updated changelog * Handle truncated block history in RPC layer * Added info about earliest block in /status RPC * Reorder height and base in blockchain reactor messages * Updated changelog * Fix tests * Appease linter * Minor review fixes * Non-empty BlockStores should always have base > 0 * Update code to assume base > 0 invariant * Added blockstore tests for pruning to 0 * Make sure we don't prune below the current base * Added BlockStore.Size() * config: added retain_blocks recommendations * Update v1 blockchain reactor to handle blockstore base * Added state database pruning * Propagate errors on missing validator sets * Comment tweaks * Improved error message Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * use ABCI field ResponseCommit.retain_height instead of retain-blocks config option * remove State.RetainHeight, return value instead * fix minor issues * rename pruneHeights() to pruneBlocks() * noop to fix GitHub borkage Co-authored-by: Anton Kaliaev <anton.kalyaev@gmail.com>
5 years ago
add support for block pruning via ABCI Commit response (#4588) * Added BlockStore.DeleteBlock() * Added initial block pruner prototype * wip * Added BlockStore.PruneBlocks() * Added consensus setting for block pruning * Added BlockStore base * Error on replay if base does not have blocks * Handle missing blocks when sending VoteSetMaj23Message * Error message tweak * Properly update blockstore state * Error message fix again * blockchain: ignore peer missing blocks * Added FIXME * Added test for block replay with truncated history * Handle peer base in blockchain reactor * Improved replay error handling * Added tests for Store.PruneBlocks() * Fix non-RPC handling of truncated block history * Panic on missing block meta in needProofBlock() * Updated changelog * Handle truncated block history in RPC layer * Added info about earliest block in /status RPC * Reorder height and base in blockchain reactor messages * Updated changelog * Fix tests * Appease linter * Minor review fixes * Non-empty BlockStores should always have base > 0 * Update code to assume base > 0 invariant * Added blockstore tests for pruning to 0 * Make sure we don't prune below the current base * Added BlockStore.Size() * config: added retain_blocks recommendations * Update v1 blockchain reactor to handle blockstore base * Added state database pruning * Propagate errors on missing validator sets * Comment tweaks * Improved error message Co-Authored-By: Anton Kaliaev <anton.kalyaev@gmail.com> * use ABCI field ResponseCommit.retain_height instead of retain-blocks config option * remove State.RetainHeight, return value instead * fix minor issues * rename pruneHeights() to pruneBlocks() * noop to fix GitHub borkage Co-authored-by: Anton Kaliaev <anton.kalyaev@gmail.com>
5 years ago
  1. package v0
  2. import (
  3. "fmt"
  4. "reflect"
  5. "time"
  6. bc "github.com/tendermint/tendermint/blockchain"
  7. "github.com/tendermint/tendermint/libs/log"
  8. "github.com/tendermint/tendermint/p2p"
  9. bcproto "github.com/tendermint/tendermint/proto/tendermint/blockchain"
  10. sm "github.com/tendermint/tendermint/state"
  11. "github.com/tendermint/tendermint/store"
  12. "github.com/tendermint/tendermint/types"
  13. )
  14. const (
  15. // BlockchainChannel is a channel for blocks and status updates (`BlockStore` height)
  16. BlockchainChannel = byte(0x40)
  17. trySyncIntervalMS = 10
  18. // stop syncing when last block's time is
  19. // within this much of the system time.
  20. // stopSyncingDurationMinutes = 10
  21. // ask for best height every 10s
  22. statusUpdateIntervalSeconds = 10
  23. // check if we should switch to consensus reactor
  24. switchToConsensusIntervalSeconds = 1
  25. // switch to consensus after this duration of inactivity
  26. syncTimeout = 60 * time.Second
  27. )
  28. type consensusReactor interface {
  29. // for when we switch from blockchain reactor and fast sync to
  30. // the consensus machine
  31. SwitchToConsensus(state sm.State, skipWAL bool)
  32. }
  33. type peerError struct {
  34. err error
  35. peerID p2p.ID
  36. }
  37. func (e peerError) Error() string {
  38. return fmt.Sprintf("error with peer %v: %s", e.peerID, e.err.Error())
  39. }
  40. // BlockchainReactor handles long-term catchup syncing.
  41. type BlockchainReactor struct {
  42. p2p.BaseReactor
  43. // immutable
  44. initialState sm.State
  45. blockExec *sm.BlockExecutor
  46. store *store.BlockStore
  47. pool *BlockPool
  48. fastSync bool
  49. requestsCh <-chan BlockRequest
  50. errorsCh <-chan peerError
  51. }
  52. // NewBlockchainReactor returns new reactor instance.
  53. func NewBlockchainReactor(state sm.State, blockExec *sm.BlockExecutor, store *store.BlockStore,
  54. fastSync bool) *BlockchainReactor {
  55. if state.LastBlockHeight != store.Height() {
  56. panic(fmt.Sprintf("state (%v) and store (%v) height mismatch", state.LastBlockHeight,
  57. store.Height()))
  58. }
  59. requestsCh := make(chan BlockRequest, maxTotalRequesters)
  60. const capacity = 1000 // must be bigger than peers count
  61. errorsCh := make(chan peerError, capacity) // so we don't block in #Receive#pool.AddBlock
  62. startHeight := store.Height() + 1
  63. if startHeight == 1 {
  64. startHeight = state.InitialHeight
  65. }
  66. pool := NewBlockPool(startHeight, requestsCh, errorsCh)
  67. bcR := &BlockchainReactor{
  68. initialState: state,
  69. blockExec: blockExec,
  70. store: store,
  71. pool: pool,
  72. fastSync: fastSync,
  73. requestsCh: requestsCh,
  74. errorsCh: errorsCh,
  75. }
  76. bcR.BaseReactor = *p2p.NewBaseReactor("BlockchainReactor", bcR)
  77. return bcR
  78. }
  79. // SetLogger implements service.Service by setting the logger on reactor and pool.
  80. func (bcR *BlockchainReactor) SetLogger(l log.Logger) {
  81. bcR.BaseService.Logger = l
  82. bcR.pool.Logger = l
  83. }
  84. // OnStart implements service.Service.
  85. func (bcR *BlockchainReactor) OnStart() error {
  86. if bcR.fastSync {
  87. err := bcR.pool.Start()
  88. if err != nil {
  89. return err
  90. }
  91. go bcR.poolRoutine(false)
  92. }
  93. return nil
  94. }
  95. // SwitchToFastSync is called by the state sync reactor when switching to fast sync.
  96. func (bcR *BlockchainReactor) SwitchToFastSync(state sm.State) error {
  97. bcR.fastSync = true
  98. bcR.initialState = state
  99. bcR.pool.height = state.LastBlockHeight + 1
  100. err := bcR.pool.Start()
  101. if err != nil {
  102. return err
  103. }
  104. go bcR.poolRoutine(true)
  105. return nil
  106. }
  107. // OnStop implements service.Service.
  108. func (bcR *BlockchainReactor) OnStop() {
  109. if bcR.fastSync {
  110. if err := bcR.pool.Stop(); err != nil {
  111. bcR.Logger.Error("Error stopping pool", "err", err)
  112. }
  113. }
  114. }
  115. // GetChannels implements Reactor
  116. func (bcR *BlockchainReactor) GetChannels() []*p2p.ChannelDescriptor {
  117. return []*p2p.ChannelDescriptor{
  118. {
  119. ID: BlockchainChannel,
  120. Priority: 5,
  121. SendQueueCapacity: 1000,
  122. RecvBufferCapacity: 50 * 4096,
  123. RecvMessageCapacity: bc.MaxMsgSize,
  124. },
  125. }
  126. }
  127. // AddPeer implements Reactor by sending our state to peer.
  128. func (bcR *BlockchainReactor) AddPeer(peer p2p.Peer) {
  129. msgBytes, err := bc.EncodeMsg(&bcproto.StatusResponse{
  130. Base: bcR.store.Base(),
  131. Height: bcR.store.Height()})
  132. if err != nil {
  133. bcR.Logger.Error("could not convert msg to protobuf", "err", err)
  134. return
  135. }
  136. _ = peer.Send(BlockchainChannel, msgBytes)
  137. // it's OK if send fails. will try later in poolRoutine
  138. // peer is added to the pool once we receive the first
  139. // bcStatusResponseMessage from the peer and call pool.SetPeerRange
  140. }
  141. // RemovePeer implements Reactor by removing peer from the pool.
  142. func (bcR *BlockchainReactor) RemovePeer(peer p2p.Peer, reason interface{}) {
  143. bcR.pool.RemovePeer(peer.ID())
  144. }
  145. // respondToPeer loads a block and sends it to the requesting peer,
  146. // if we have it. Otherwise, we'll respond saying we don't have it.
  147. func (bcR *BlockchainReactor) respondToPeer(msg *bcproto.BlockRequest,
  148. src p2p.Peer) (queued bool) {
  149. block := bcR.store.LoadBlock(msg.Height)
  150. if block != nil {
  151. bl, err := block.ToProto()
  152. if err != nil {
  153. bcR.Logger.Error("could not convert msg to protobuf", "err", err)
  154. return false
  155. }
  156. msgBytes, err := bc.EncodeMsg(&bcproto.BlockResponse{Block: bl})
  157. if err != nil {
  158. bcR.Logger.Error("could not marshal msg", "err", err)
  159. return false
  160. }
  161. return src.TrySend(BlockchainChannel, msgBytes)
  162. }
  163. bcR.Logger.Info("Peer asking for a block we don't have", "src", src, "height", msg.Height)
  164. msgBytes, err := bc.EncodeMsg(&bcproto.NoBlockResponse{Height: msg.Height})
  165. if err != nil {
  166. bcR.Logger.Error("could not convert msg to protobuf", "err", err)
  167. return false
  168. }
  169. return src.TrySend(BlockchainChannel, msgBytes)
  170. }
  171. // Receive implements Reactor by handling 4 types of messages (look below).
  172. // XXX: do not call any methods that can block or incur heavy processing.
  173. // https://github.com/tendermint/tendermint/issues/2888
  174. func (bcR *BlockchainReactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
  175. logger := bcR.Logger.With("src", src, "chId", chID)
  176. msg, err := bc.DecodeMsg(msgBytes)
  177. if err != nil {
  178. logger.Error("Error decoding message", "err", err)
  179. bcR.Switch.StopPeerForError(src, err)
  180. return
  181. }
  182. if err = bc.ValidateMsg(msg); err != nil {
  183. logger.Error("Peer sent us invalid msg", "msg", msg, "err", err)
  184. bcR.Switch.StopPeerForError(src, err)
  185. return
  186. }
  187. logger.Debug("Receive", "msg", msg)
  188. switch msg := msg.(type) {
  189. case *bcproto.BlockRequest:
  190. bcR.respondToPeer(msg, src)
  191. case *bcproto.BlockResponse:
  192. bi, err := types.BlockFromProto(msg.Block)
  193. if err != nil {
  194. logger.Error("Block content is invalid", "err", err)
  195. bcR.Switch.StopPeerForError(src, err)
  196. return
  197. }
  198. bcR.pool.AddBlock(src.ID(), bi, len(msgBytes))
  199. case *bcproto.StatusRequest:
  200. // Send peer our state.
  201. msgBytes, err := bc.EncodeMsg(&bcproto.StatusResponse{
  202. Height: bcR.store.Height(),
  203. Base: bcR.store.Base(),
  204. })
  205. if err != nil {
  206. logger.Error("could not convert msg to protobut", "err", err)
  207. return
  208. }
  209. src.TrySend(BlockchainChannel, msgBytes)
  210. case *bcproto.StatusResponse:
  211. // Got a peer status. Unverified.
  212. bcR.pool.SetPeerRange(src.ID(), msg.Base, msg.Height)
  213. case *bcproto.NoBlockResponse:
  214. logger.Debug("Peer does not have requested block", "height", msg.Height)
  215. default:
  216. logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg)))
  217. }
  218. }
  219. // Handle messages from the poolReactor telling the reactor what to do.
  220. // NOTE: Don't sleep in the FOR_LOOP or otherwise slow it down!
  221. func (bcR *BlockchainReactor) poolRoutine(stateSynced bool) {
  222. var (
  223. trySyncTicker = time.NewTicker(trySyncIntervalMS * time.Millisecond)
  224. statusUpdateTicker = time.NewTicker(statusUpdateIntervalSeconds * time.Second)
  225. switchToConsensusTicker = time.NewTicker(switchToConsensusIntervalSeconds * time.Second)
  226. blocksSynced = uint64(0)
  227. chainID = bcR.initialState.ChainID
  228. state = bcR.initialState
  229. lastHundred = time.Now()
  230. lastRate = 0.0
  231. didProcessCh = make(chan struct{}, 1)
  232. )
  233. go func() {
  234. for {
  235. select {
  236. case <-bcR.Quit():
  237. return
  238. case <-bcR.pool.Quit():
  239. return
  240. case request := <-bcR.requestsCh:
  241. peer := bcR.Switch.Peers().Get(request.PeerID)
  242. if peer == nil {
  243. bcR.Logger.Debug("Can't send request: no peer", "peer_id", request.PeerID)
  244. continue
  245. }
  246. msgBytes, err := bc.EncodeMsg(&bcproto.BlockRequest{Height: request.Height})
  247. if err != nil {
  248. bcR.Logger.Error("could not convert BlockRequest to proto", "err", err)
  249. continue
  250. }
  251. queued := peer.TrySend(BlockchainChannel, msgBytes)
  252. if !queued {
  253. bcR.Logger.Debug("Send queue is full, drop block request", "peer", peer.ID(), "height", request.Height)
  254. }
  255. case err := <-bcR.errorsCh:
  256. peer := bcR.Switch.Peers().Get(err.peerID)
  257. if peer != nil {
  258. bcR.Switch.StopPeerForError(peer, err)
  259. }
  260. case <-statusUpdateTicker.C:
  261. // ask for status updates
  262. go bcR.BroadcastStatusRequest()
  263. }
  264. }
  265. }()
  266. FOR_LOOP:
  267. for {
  268. select {
  269. case <-switchToConsensusTicker.C:
  270. var (
  271. height, numPending, lenRequesters = bcR.pool.GetStatus()
  272. outbound, inbound, _ = bcR.Switch.NumPeers()
  273. lastAdvance = bcR.pool.LastAdvance()
  274. )
  275. bcR.Logger.Debug("Consensus ticker",
  276. "numPending", numPending,
  277. "total", lenRequesters)
  278. switch {
  279. case bcR.pool.IsCaughtUp():
  280. bcR.Logger.Info("Time to switch to consensus reactor!", "height", height)
  281. case time.Since(lastAdvance) > syncTimeout:
  282. bcR.Logger.Error(fmt.Sprintf("No progress since last advance: %v", lastAdvance))
  283. default:
  284. bcR.Logger.Info("Not caught up yet",
  285. "height", height, "max_peer_height", bcR.pool.MaxPeerHeight(),
  286. "num_peers", outbound+inbound,
  287. "timeout_in", syncTimeout-time.Since(lastAdvance))
  288. continue
  289. }
  290. if err := bcR.pool.Stop(); err != nil {
  291. bcR.Logger.Error("Error stopping pool", "err", err)
  292. }
  293. conR, ok := bcR.Switch.Reactor("CONSENSUS").(consensusReactor)
  294. if ok {
  295. conR.SwitchToConsensus(state, blocksSynced > 0 || stateSynced)
  296. }
  297. break FOR_LOOP
  298. case <-trySyncTicker.C: // chan time
  299. select {
  300. case didProcessCh <- struct{}{}:
  301. default:
  302. }
  303. case <-didProcessCh:
  304. // NOTE: It is a subtle mistake to process more than a single block
  305. // at a time (e.g. 10) here, because we only TrySend 1 request per
  306. // loop. The ratio mismatch can result in starving of blocks, a
  307. // sudden burst of requests and responses, and repeat.
  308. // Consequently, it is better to split these routines rather than
  309. // coupling them as it's written here. TODO uncouple from request
  310. // routine.
  311. // See if there are any blocks to sync.
  312. first, second := bcR.pool.PeekTwoBlocks()
  313. // bcR.Logger.Info("TrySync peeked", "first", first, "second", second)
  314. if first == nil || second == nil {
  315. // We need both to sync the first block.
  316. continue FOR_LOOP
  317. } else {
  318. // Try again quickly next loop.
  319. didProcessCh <- struct{}{}
  320. }
  321. var (
  322. firstParts = first.MakePartSet(types.BlockPartSizeBytes)
  323. firstPartSetHeader = firstParts.Header()
  324. firstID = types.BlockID{Hash: first.Hash(), PartSetHeader: firstPartSetHeader}
  325. )
  326. // Finally, verify the first block using the second's commit
  327. // NOTE: we can probably make this more efficient, but note that calling
  328. // first.Hash() doesn't verify the tx contents, so MakePartSet() is
  329. // currently necessary.
  330. err := state.Validators.VerifyCommitLight(chainID, firstID, first.Height, second.LastCommit)
  331. if err != nil {
  332. err = fmt.Errorf("invalid last commit: %w", err)
  333. bcR.Logger.Error(err.Error(),
  334. "last_commit", second.LastCommit, "block_id", firstID, "height", first.Height)
  335. peerID := bcR.pool.RedoRequest(first.Height)
  336. peer := bcR.Switch.Peers().Get(peerID)
  337. if peer != nil {
  338. // NOTE: we've already removed the peer's request, but we still need
  339. // to clean up the rest.
  340. bcR.Switch.StopPeerForError(peer, err)
  341. }
  342. peerID2 := bcR.pool.RedoRequest(second.Height)
  343. if peerID2 != peerID {
  344. if peer2 := bcR.Switch.Peers().Get(peerID2); peer2 != nil {
  345. bcR.Switch.StopPeerForError(peer2, err)
  346. }
  347. }
  348. continue FOR_LOOP
  349. } else {
  350. bcR.pool.PopRequest()
  351. // TODO: batch saves so we dont persist to disk every block
  352. bcR.store.SaveBlock(first, firstParts, second.LastCommit)
  353. // TODO: same thing for app - but we would need a way to get the hash
  354. // without persisting the state.
  355. var err error
  356. state, _, err = bcR.blockExec.ApplyBlock(state, firstID, first)
  357. if err != nil {
  358. // TODO This is bad, are we zombie?
  359. panic(fmt.Sprintf("Failed to process committed block (%d:%X): %v", first.Height, first.Hash(), err))
  360. }
  361. blocksSynced++
  362. if blocksSynced%100 == 0 {
  363. lastRate = 0.9*lastRate + 0.1*(100/time.Since(lastHundred).Seconds())
  364. bcR.Logger.Info("Fast Sync Rate",
  365. "height", bcR.pool.height, "max_peer_height", bcR.pool.MaxPeerHeight(), "blocks/s", lastRate)
  366. lastHundred = time.Now()
  367. }
  368. }
  369. continue FOR_LOOP
  370. case <-bcR.Quit():
  371. break FOR_LOOP
  372. }
  373. }
  374. }
  375. // BroadcastStatusRequest broadcasts `BlockStore` base and height.
  376. func (bcR *BlockchainReactor) BroadcastStatusRequest() {
  377. bm, err := bc.EncodeMsg(&bcproto.StatusRequest{})
  378. if err != nil {
  379. bcR.Logger.Error("could not convert StatusRequest to proto", "err", err)
  380. return
  381. }
  382. // We don't care about whenever broadcast is successful or not.
  383. _ = bcR.Switch.Broadcast(BlockchainChannel, bm)
  384. }