You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

617 lines
17 KiB

cleanup: Reduce and normalize import path aliasing. (#6975) The code in the Tendermint repository makes heavy use of import aliasing. This is made necessary by our extensive reuse of common base package names, and by repetition of similar names across different subdirectories. Unfortunately we have not been very consistent about which packages we alias in various circumstances, and the aliases we use vary. In the spirit of the advice in the style guide and https://github.com/golang/go/wiki/CodeReviewComments#imports, his change makes an effort to clean up and normalize import aliasing. This change makes no API or behavioral changes. It is a pure cleanup intended o help make the code more readable to developers (including myself) trying to understand what is being imported where. Only unexported names have been modified, and the changes were generated and applied mechanically with gofmt -r and comby, respecting the lexical and syntactic rules of Go. Even so, I did not fix every inconsistency. Where the changes would be too disruptive, I left it alone. The principles I followed in this cleanup are: - Remove aliases that restate the package name. - Remove aliases where the base package name is unambiguous. - Move overly-terse abbreviations from the import to the usage site. - Fix lexical issues (remove underscores, remove capitalization). - Fix import groupings to more closely match the style guide. - Group blank (side-effecting) imports and ensure they are commented. - Add aliases to multiple imports with the same base package name.
3 years ago
cleanup: Reduce and normalize import path aliasing. (#6975) The code in the Tendermint repository makes heavy use of import aliasing. This is made necessary by our extensive reuse of common base package names, and by repetition of similar names across different subdirectories. Unfortunately we have not been very consistent about which packages we alias in various circumstances, and the aliases we use vary. In the spirit of the advice in the style guide and https://github.com/golang/go/wiki/CodeReviewComments#imports, his change makes an effort to clean up and normalize import aliasing. This change makes no API or behavioral changes. It is a pure cleanup intended o help make the code more readable to developers (including myself) trying to understand what is being imported where. Only unexported names have been modified, and the changes were generated and applied mechanically with gofmt -r and comby, respecting the lexical and syntactic rules of Go. Even so, I did not fix every inconsistency. Where the changes would be too disruptive, I left it alone. The principles I followed in this cleanup are: - Remove aliases that restate the package name. - Remove aliases where the base package name is unambiguous. - Move overly-terse abbreviations from the import to the usage site. - Fix lexical issues (remove underscores, remove capitalization). - Fix import groupings to more closely match the style guide. - Group blank (side-effecting) imports and ensure they are commented. - Add aliases to multiple imports with the same base package name.
3 years ago
cleanup: Reduce and normalize import path aliasing. (#6975) The code in the Tendermint repository makes heavy use of import aliasing. This is made necessary by our extensive reuse of common base package names, and by repetition of similar names across different subdirectories. Unfortunately we have not been very consistent about which packages we alias in various circumstances, and the aliases we use vary. In the spirit of the advice in the style guide and https://github.com/golang/go/wiki/CodeReviewComments#imports, his change makes an effort to clean up and normalize import aliasing. This change makes no API or behavioral changes. It is a pure cleanup intended o help make the code more readable to developers (including myself) trying to understand what is being imported where. Only unexported names have been modified, and the changes were generated and applied mechanically with gofmt -r and comby, respecting the lexical and syntactic rules of Go. Even so, I did not fix every inconsistency. Where the changes would be too disruptive, I left it alone. The principles I followed in this cleanup are: - Remove aliases that restate the package name. - Remove aliases where the base package name is unambiguous. - Move overly-terse abbreviations from the import to the usage site. - Fix lexical issues (remove underscores, remove capitalization). - Fix import groupings to more closely match the style guide. - Group blank (side-effecting) imports and ensure they are commented. - Add aliases to multiple imports with the same base package name.
3 years ago
cleanup: Reduce and normalize import path aliasing. (#6975) The code in the Tendermint repository makes heavy use of import aliasing. This is made necessary by our extensive reuse of common base package names, and by repetition of similar names across different subdirectories. Unfortunately we have not been very consistent about which packages we alias in various circumstances, and the aliases we use vary. In the spirit of the advice in the style guide and https://github.com/golang/go/wiki/CodeReviewComments#imports, his change makes an effort to clean up and normalize import aliasing. This change makes no API or behavioral changes. It is a pure cleanup intended o help make the code more readable to developers (including myself) trying to understand what is being imported where. Only unexported names have been modified, and the changes were generated and applied mechanically with gofmt -r and comby, respecting the lexical and syntactic rules of Go. Even so, I did not fix every inconsistency. Where the changes would be too disruptive, I left it alone. The principles I followed in this cleanup are: - Remove aliases that restate the package name. - Remove aliases where the base package name is unambiguous. - Move overly-terse abbreviations from the import to the usage site. - Fix lexical issues (remove underscores, remove capitalization). - Fix import groupings to more closely match the style guide. - Group blank (side-effecting) imports and ensure they are commented. - Add aliases to multiple imports with the same base package name.
3 years ago
cleanup: Reduce and normalize import path aliasing. (#6975) The code in the Tendermint repository makes heavy use of import aliasing. This is made necessary by our extensive reuse of common base package names, and by repetition of similar names across different subdirectories. Unfortunately we have not been very consistent about which packages we alias in various circumstances, and the aliases we use vary. In the spirit of the advice in the style guide and https://github.com/golang/go/wiki/CodeReviewComments#imports, his change makes an effort to clean up and normalize import aliasing. This change makes no API or behavioral changes. It is a pure cleanup intended o help make the code more readable to developers (including myself) trying to understand what is being imported where. Only unexported names have been modified, and the changes were generated and applied mechanically with gofmt -r and comby, respecting the lexical and syntactic rules of Go. Even so, I did not fix every inconsistency. Where the changes would be too disruptive, I left it alone. The principles I followed in this cleanup are: - Remove aliases that restate the package name. - Remove aliases where the base package name is unambiguous. - Move overly-terse abbreviations from the import to the usage site. - Fix lexical issues (remove underscores, remove capitalization). - Fix import groupings to more closely match the style guide. - Group blank (side-effecting) imports and ensure they are commented. - Add aliases to multiple imports with the same base package name.
3 years ago
cleanup: Reduce and normalize import path aliasing. (#6975) The code in the Tendermint repository makes heavy use of import aliasing. This is made necessary by our extensive reuse of common base package names, and by repetition of similar names across different subdirectories. Unfortunately we have not been very consistent about which packages we alias in various circumstances, and the aliases we use vary. In the spirit of the advice in the style guide and https://github.com/golang/go/wiki/CodeReviewComments#imports, his change makes an effort to clean up and normalize import aliasing. This change makes no API or behavioral changes. It is a pure cleanup intended o help make the code more readable to developers (including myself) trying to understand what is being imported where. Only unexported names have been modified, and the changes were generated and applied mechanically with gofmt -r and comby, respecting the lexical and syntactic rules of Go. Even so, I did not fix every inconsistency. Where the changes would be too disruptive, I left it alone. The principles I followed in this cleanup are: - Remove aliases that restate the package name. - Remove aliases where the base package name is unambiguous. - Move overly-terse abbreviations from the import to the usage site. - Fix lexical issues (remove underscores, remove capitalization). - Fix import groupings to more closely match the style guide. - Group blank (side-effecting) imports and ensure they are commented. - Add aliases to multiple imports with the same base package name.
3 years ago
blocksync: fix shutdown deadlock issue (#7030) When shutting down blocksync, it is observed that the process can hang completely. A dump of running goroutines reveals that this is due to goroutines not listening on the correct shutdown signal. Namely, the `poolRoutine` goroutine does not wait on `pool.Quit`. The `poolRoutine` does not receive any other shutdown signal during `OnStop` becuase it must stop before the `r.closeCh` is closed. Currently the `poolRoutine` listens in the `closeCh` which will not close until the `poolRoutine` stops and calls `poolWG.Done()`. This change also puts the `requestRoutine()` in the `OnStart` method to make it more visible since it does not rely on anything that is spawned in the `poolRoutine`. ``` goroutine 183 [semacquire]: sync.runtime_Semacquire(0xc0000d3bd8) runtime/sema.go:56 +0x45 sync.(*WaitGroup).Wait(0xc0000d3bd0) sync/waitgroup.go:130 +0x65 github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).OnStop(0xc0000d3a00) github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:193 +0x47 github.com/tendermint/tendermint/libs/service.(*BaseService).Stop(0xc0000d3a00, 0x0, 0x0) github.com/tendermint/tendermint/libs/service/service.go:171 +0x323 github.com/tendermint/tendermint/node.(*nodeImpl).OnStop(0xc00052c000) github.com/tendermint/tendermint/node/node.go:758 +0xc62 github.com/tendermint/tendermint/libs/service.(*BaseService).Stop(0xc00052c000, 0x0, 0x0) github.com/tendermint/tendermint/libs/service/service.go:171 +0x323 github.com/tendermint/tendermint/cmd/tendermint/commands.NewRunNodeCmd.func1.1() github.com/tendermint/tendermint/cmd/tendermint/commands/run_node.go:143 +0x62 github.com/tendermint/tendermint/libs/os.TrapSignal.func1(0xc000df6d20, 0x7f04a68da900, 0xc0004a8930, 0xc0005a72d8) github.com/tendermint/tendermint/libs/os/os.go:26 +0x102 created by github.com/tendermint/tendermint/libs/os.TrapSignal github.com/tendermint/tendermint/libs/os/os.go:22 +0xe6 goroutine 161 [select]: github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).poolRoutine(0xc0000d3a00, 0x0) github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:464 +0x2b3 created by github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).OnStart github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:174 +0xf1 goroutine 162 [select]: github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).processBlockSyncCh(0xc0000d3a00) github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:310 +0x151 created by github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).OnStart github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:177 +0x54 goroutine 163 [select]: github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).processPeerUpdates(0xc0000d3a00) github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:363 +0x12b created by github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).OnStart github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:178 +0x76 ```
3 years ago
blocksync: fix shutdown deadlock issue (#7030) When shutting down blocksync, it is observed that the process can hang completely. A dump of running goroutines reveals that this is due to goroutines not listening on the correct shutdown signal. Namely, the `poolRoutine` goroutine does not wait on `pool.Quit`. The `poolRoutine` does not receive any other shutdown signal during `OnStop` becuase it must stop before the `r.closeCh` is closed. Currently the `poolRoutine` listens in the `closeCh` which will not close until the `poolRoutine` stops and calls `poolWG.Done()`. This change also puts the `requestRoutine()` in the `OnStart` method to make it more visible since it does not rely on anything that is spawned in the `poolRoutine`. ``` goroutine 183 [semacquire]: sync.runtime_Semacquire(0xc0000d3bd8) runtime/sema.go:56 +0x45 sync.(*WaitGroup).Wait(0xc0000d3bd0) sync/waitgroup.go:130 +0x65 github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).OnStop(0xc0000d3a00) github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:193 +0x47 github.com/tendermint/tendermint/libs/service.(*BaseService).Stop(0xc0000d3a00, 0x0, 0x0) github.com/tendermint/tendermint/libs/service/service.go:171 +0x323 github.com/tendermint/tendermint/node.(*nodeImpl).OnStop(0xc00052c000) github.com/tendermint/tendermint/node/node.go:758 +0xc62 github.com/tendermint/tendermint/libs/service.(*BaseService).Stop(0xc00052c000, 0x0, 0x0) github.com/tendermint/tendermint/libs/service/service.go:171 +0x323 github.com/tendermint/tendermint/cmd/tendermint/commands.NewRunNodeCmd.func1.1() github.com/tendermint/tendermint/cmd/tendermint/commands/run_node.go:143 +0x62 github.com/tendermint/tendermint/libs/os.TrapSignal.func1(0xc000df6d20, 0x7f04a68da900, 0xc0004a8930, 0xc0005a72d8) github.com/tendermint/tendermint/libs/os/os.go:26 +0x102 created by github.com/tendermint/tendermint/libs/os.TrapSignal github.com/tendermint/tendermint/libs/os/os.go:22 +0xe6 goroutine 161 [select]: github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).poolRoutine(0xc0000d3a00, 0x0) github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:464 +0x2b3 created by github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).OnStart github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:174 +0xf1 goroutine 162 [select]: github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).processBlockSyncCh(0xc0000d3a00) github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:310 +0x151 created by github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).OnStart github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:177 +0x54 goroutine 163 [select]: github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).processPeerUpdates(0xc0000d3a00) github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:363 +0x12b created by github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).OnStart github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:178 +0x76 ```
3 years ago
blocksync: fix shutdown deadlock issue (#7030) When shutting down blocksync, it is observed that the process can hang completely. A dump of running goroutines reveals that this is due to goroutines not listening on the correct shutdown signal. Namely, the `poolRoutine` goroutine does not wait on `pool.Quit`. The `poolRoutine` does not receive any other shutdown signal during `OnStop` becuase it must stop before the `r.closeCh` is closed. Currently the `poolRoutine` listens in the `closeCh` which will not close until the `poolRoutine` stops and calls `poolWG.Done()`. This change also puts the `requestRoutine()` in the `OnStart` method to make it more visible since it does not rely on anything that is spawned in the `poolRoutine`. ``` goroutine 183 [semacquire]: sync.runtime_Semacquire(0xc0000d3bd8) runtime/sema.go:56 +0x45 sync.(*WaitGroup).Wait(0xc0000d3bd0) sync/waitgroup.go:130 +0x65 github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).OnStop(0xc0000d3a00) github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:193 +0x47 github.com/tendermint/tendermint/libs/service.(*BaseService).Stop(0xc0000d3a00, 0x0, 0x0) github.com/tendermint/tendermint/libs/service/service.go:171 +0x323 github.com/tendermint/tendermint/node.(*nodeImpl).OnStop(0xc00052c000) github.com/tendermint/tendermint/node/node.go:758 +0xc62 github.com/tendermint/tendermint/libs/service.(*BaseService).Stop(0xc00052c000, 0x0, 0x0) github.com/tendermint/tendermint/libs/service/service.go:171 +0x323 github.com/tendermint/tendermint/cmd/tendermint/commands.NewRunNodeCmd.func1.1() github.com/tendermint/tendermint/cmd/tendermint/commands/run_node.go:143 +0x62 github.com/tendermint/tendermint/libs/os.TrapSignal.func1(0xc000df6d20, 0x7f04a68da900, 0xc0004a8930, 0xc0005a72d8) github.com/tendermint/tendermint/libs/os/os.go:26 +0x102 created by github.com/tendermint/tendermint/libs/os.TrapSignal github.com/tendermint/tendermint/libs/os/os.go:22 +0xe6 goroutine 161 [select]: github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).poolRoutine(0xc0000d3a00, 0x0) github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:464 +0x2b3 created by github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).OnStart github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:174 +0xf1 goroutine 162 [select]: github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).processBlockSyncCh(0xc0000d3a00) github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:310 +0x151 created by github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).OnStart github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:177 +0x54 goroutine 163 [select]: github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).processPeerUpdates(0xc0000d3a00) github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:363 +0x12b created by github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).OnStart github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:178 +0x76 ```
3 years ago
blocksync: fix shutdown deadlock issue (#7030) When shutting down blocksync, it is observed that the process can hang completely. A dump of running goroutines reveals that this is due to goroutines not listening on the correct shutdown signal. Namely, the `poolRoutine` goroutine does not wait on `pool.Quit`. The `poolRoutine` does not receive any other shutdown signal during `OnStop` becuase it must stop before the `r.closeCh` is closed. Currently the `poolRoutine` listens in the `closeCh` which will not close until the `poolRoutine` stops and calls `poolWG.Done()`. This change also puts the `requestRoutine()` in the `OnStart` method to make it more visible since it does not rely on anything that is spawned in the `poolRoutine`. ``` goroutine 183 [semacquire]: sync.runtime_Semacquire(0xc0000d3bd8) runtime/sema.go:56 +0x45 sync.(*WaitGroup).Wait(0xc0000d3bd0) sync/waitgroup.go:130 +0x65 github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).OnStop(0xc0000d3a00) github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:193 +0x47 github.com/tendermint/tendermint/libs/service.(*BaseService).Stop(0xc0000d3a00, 0x0, 0x0) github.com/tendermint/tendermint/libs/service/service.go:171 +0x323 github.com/tendermint/tendermint/node.(*nodeImpl).OnStop(0xc00052c000) github.com/tendermint/tendermint/node/node.go:758 +0xc62 github.com/tendermint/tendermint/libs/service.(*BaseService).Stop(0xc00052c000, 0x0, 0x0) github.com/tendermint/tendermint/libs/service/service.go:171 +0x323 github.com/tendermint/tendermint/cmd/tendermint/commands.NewRunNodeCmd.func1.1() github.com/tendermint/tendermint/cmd/tendermint/commands/run_node.go:143 +0x62 github.com/tendermint/tendermint/libs/os.TrapSignal.func1(0xc000df6d20, 0x7f04a68da900, 0xc0004a8930, 0xc0005a72d8) github.com/tendermint/tendermint/libs/os/os.go:26 +0x102 created by github.com/tendermint/tendermint/libs/os.TrapSignal github.com/tendermint/tendermint/libs/os/os.go:22 +0xe6 goroutine 161 [select]: github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).poolRoutine(0xc0000d3a00, 0x0) github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:464 +0x2b3 created by github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).OnStart github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:174 +0xf1 goroutine 162 [select]: github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).processBlockSyncCh(0xc0000d3a00) github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:310 +0x151 created by github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).OnStart github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:177 +0x54 goroutine 163 [select]: github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).processPeerUpdates(0xc0000d3a00) github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:363 +0x12b created by github.com/tendermint/tendermint/internal/blocksync/v0.(*Reactor).OnStart github.com/tendermint/tendermint/internal/blocksync/v0/reactor.go:178 +0x76 ```
3 years ago
  1. package blocksync
  2. import (
  3. "context"
  4. "fmt"
  5. "runtime/debug"
  6. "sync"
  7. "time"
  8. "github.com/tendermint/tendermint/internal/consensus"
  9. "github.com/tendermint/tendermint/internal/p2p"
  10. sm "github.com/tendermint/tendermint/internal/state"
  11. "github.com/tendermint/tendermint/internal/store"
  12. "github.com/tendermint/tendermint/libs/log"
  13. "github.com/tendermint/tendermint/libs/service"
  14. tmsync "github.com/tendermint/tendermint/libs/sync"
  15. bcproto "github.com/tendermint/tendermint/proto/tendermint/blocksync"
  16. "github.com/tendermint/tendermint/types"
  17. )
  18. var _ service.Service = (*Reactor)(nil)
  19. const (
  20. // BlockSyncChannel is a channel for blocks and status updates
  21. BlockSyncChannel = p2p.ChannelID(0x40)
  22. trySyncIntervalMS = 10
  23. // ask for best height every 10s
  24. statusUpdateIntervalSeconds = 10
  25. // check if we should switch to consensus reactor
  26. switchToConsensusIntervalSeconds = 1
  27. // switch to consensus after this duration of inactivity
  28. syncTimeout = 60 * time.Second
  29. )
  30. func GetChannelDescriptor() *p2p.ChannelDescriptor {
  31. return &p2p.ChannelDescriptor{
  32. ID: BlockSyncChannel,
  33. MessageType: new(bcproto.Message),
  34. Priority: 5,
  35. SendQueueCapacity: 1000,
  36. RecvBufferCapacity: 1024,
  37. RecvMessageCapacity: MaxMsgSize,
  38. }
  39. }
  40. type consensusReactor interface {
  41. // For when we switch from block sync reactor to the consensus
  42. // machine.
  43. SwitchToConsensus(ctx context.Context, state sm.State, skipWAL bool)
  44. }
  45. type peerError struct {
  46. err error
  47. peerID types.NodeID
  48. }
  49. func (e peerError) Error() string {
  50. return fmt.Sprintf("error with peer %v: %s", e.peerID, e.err.Error())
  51. }
  52. // Reactor handles long-term catchup syncing.
  53. type Reactor struct {
  54. service.BaseService
  55. logger log.Logger
  56. // immutable
  57. initialState sm.State
  58. blockExec *sm.BlockExecutor
  59. store *store.BlockStore
  60. pool *BlockPool
  61. consReactor consensusReactor
  62. blockSync *tmsync.AtomicBool
  63. blockSyncCh *p2p.Channel
  64. // blockSyncOutBridgeCh defines a channel that acts as a bridge between sending Envelope
  65. // messages that the reactor will consume in processBlockSyncCh and receiving messages
  66. // from the peer updates channel and other goroutines. We do this instead of directly
  67. // sending on blockSyncCh.Out to avoid race conditions in the case where other goroutines
  68. // send Envelopes directly to the to blockSyncCh.Out channel, since processBlockSyncCh
  69. // may close the blockSyncCh.Out channel at the same time that other goroutines send to
  70. // blockSyncCh.Out.
  71. blockSyncOutBridgeCh chan p2p.Envelope
  72. peerUpdates *p2p.PeerUpdates
  73. requestsCh <-chan BlockRequest
  74. errorsCh <-chan peerError
  75. // poolWG is used to synchronize the graceful shutdown of the poolRoutine and
  76. // requestRoutine spawned goroutines when stopping the reactor and before
  77. // stopping the p2p Channel(s).
  78. poolWG sync.WaitGroup
  79. metrics *consensus.Metrics
  80. syncStartTime time.Time
  81. }
  82. // NewReactor returns new reactor instance.
  83. func NewReactor(
  84. logger log.Logger,
  85. state sm.State,
  86. blockExec *sm.BlockExecutor,
  87. store *store.BlockStore,
  88. consReactor consensusReactor,
  89. blockSyncCh *p2p.Channel,
  90. peerUpdates *p2p.PeerUpdates,
  91. blockSync bool,
  92. metrics *consensus.Metrics,
  93. ) (*Reactor, error) {
  94. if state.LastBlockHeight != store.Height() {
  95. return nil, fmt.Errorf("state (%v) and store (%v) height mismatch", state.LastBlockHeight, store.Height())
  96. }
  97. startHeight := store.Height() + 1
  98. if startHeight == 1 {
  99. startHeight = state.InitialHeight
  100. }
  101. requestsCh := make(chan BlockRequest, maxTotalRequesters)
  102. errorsCh := make(chan peerError, maxPeerErrBuffer) // NOTE: The capacity should be larger than the peer count.
  103. r := &Reactor{
  104. logger: logger,
  105. initialState: state,
  106. blockExec: blockExec,
  107. store: store,
  108. pool: NewBlockPool(logger, startHeight, requestsCh, errorsCh),
  109. consReactor: consReactor,
  110. blockSync: tmsync.NewBool(blockSync),
  111. requestsCh: requestsCh,
  112. errorsCh: errorsCh,
  113. blockSyncCh: blockSyncCh,
  114. blockSyncOutBridgeCh: make(chan p2p.Envelope),
  115. peerUpdates: peerUpdates,
  116. metrics: metrics,
  117. syncStartTime: time.Time{},
  118. }
  119. r.BaseService = *service.NewBaseService(logger, "BlockSync", r)
  120. return r, nil
  121. }
  122. // OnStart starts separate go routines for each p2p Channel and listens for
  123. // envelopes on each. In addition, it also listens for peer updates and handles
  124. // messages on that p2p channel accordingly. The caller must be sure to execute
  125. // OnStop to ensure the outbound p2p Channels are closed.
  126. //
  127. // If blockSync is enabled, we also start the pool and the pool processing
  128. // goroutine. If the pool fails to start, an error is returned.
  129. func (r *Reactor) OnStart(ctx context.Context) error {
  130. if r.blockSync.IsSet() {
  131. if err := r.pool.Start(ctx); err != nil {
  132. return err
  133. }
  134. r.poolWG.Add(1)
  135. go r.requestRoutine(ctx)
  136. r.poolWG.Add(1)
  137. go r.poolRoutine(ctx, false)
  138. }
  139. go r.processBlockSyncCh(ctx)
  140. go r.processPeerUpdates(ctx)
  141. return nil
  142. }
  143. // OnStop stops the reactor by signaling to all spawned goroutines to exit and
  144. // blocking until they all exit.
  145. func (r *Reactor) OnStop() {
  146. if r.blockSync.IsSet() {
  147. if err := r.pool.Stop(); err != nil {
  148. r.logger.Error("failed to stop pool", "err", err)
  149. }
  150. }
  151. // wait for the poolRoutine and requestRoutine goroutines to gracefully exit
  152. r.poolWG.Wait()
  153. }
  154. // respondToPeer loads a block and sends it to the requesting peer, if we have it.
  155. // Otherwise, we'll respond saying we do not have it.
  156. func (r *Reactor) respondToPeer(msg *bcproto.BlockRequest, peerID types.NodeID) {
  157. block := r.store.LoadBlock(msg.Height)
  158. if block != nil {
  159. blockProto, err := block.ToProto()
  160. if err != nil {
  161. r.logger.Error("failed to convert msg to protobuf", "err", err)
  162. return
  163. }
  164. r.blockSyncCh.Out <- p2p.Envelope{
  165. To: peerID,
  166. Message: &bcproto.BlockResponse{Block: blockProto},
  167. }
  168. return
  169. }
  170. r.logger.Info("peer requesting a block we do not have", "peer", peerID, "height", msg.Height)
  171. r.blockSyncCh.Out <- p2p.Envelope{
  172. To: peerID,
  173. Message: &bcproto.NoBlockResponse{Height: msg.Height},
  174. }
  175. }
  176. // handleBlockSyncMessage handles envelopes sent from peers on the
  177. // BlockSyncChannel. It returns an error only if the Envelope.Message is unknown
  178. // for this channel. This should never be called outside of handleMessage.
  179. func (r *Reactor) handleBlockSyncMessage(envelope p2p.Envelope) error {
  180. logger := r.logger.With("peer", envelope.From)
  181. switch msg := envelope.Message.(type) {
  182. case *bcproto.BlockRequest:
  183. r.respondToPeer(msg, envelope.From)
  184. case *bcproto.BlockResponse:
  185. block, err := types.BlockFromProto(msg.Block)
  186. if err != nil {
  187. logger.Error("failed to convert block from proto", "err", err)
  188. return err
  189. }
  190. r.pool.AddBlock(envelope.From, block, block.Size())
  191. case *bcproto.StatusRequest:
  192. r.blockSyncCh.Out <- p2p.Envelope{
  193. To: envelope.From,
  194. Message: &bcproto.StatusResponse{
  195. Height: r.store.Height(),
  196. Base: r.store.Base(),
  197. },
  198. }
  199. case *bcproto.StatusResponse:
  200. r.pool.SetPeerRange(envelope.From, msg.Base, msg.Height)
  201. case *bcproto.NoBlockResponse:
  202. logger.Debug("peer does not have the requested block", "height", msg.Height)
  203. default:
  204. return fmt.Errorf("received unknown message: %T", msg)
  205. }
  206. return nil
  207. }
  208. // handleMessage handles an Envelope sent from a peer on a specific p2p Channel.
  209. // It will handle errors and any possible panics gracefully. A caller can handle
  210. // any error returned by sending a PeerError on the respective channel.
  211. func (r *Reactor) handleMessage(chID p2p.ChannelID, envelope p2p.Envelope) (err error) {
  212. defer func() {
  213. if e := recover(); e != nil {
  214. err = fmt.Errorf("panic in processing message: %v", e)
  215. r.logger.Error(
  216. "recovering from processing message panic",
  217. "err", err,
  218. "stack", string(debug.Stack()),
  219. )
  220. }
  221. }()
  222. r.logger.Debug("received message", "message", envelope.Message, "peer", envelope.From)
  223. switch chID {
  224. case BlockSyncChannel:
  225. err = r.handleBlockSyncMessage(envelope)
  226. default:
  227. err = fmt.Errorf("unknown channel ID (%d) for envelope (%v)", chID, envelope)
  228. }
  229. return err
  230. }
  231. // processBlockSyncCh initiates a blocking process where we listen for and handle
  232. // envelopes on the BlockSyncChannel and blockSyncOutBridgeCh. Any error encountered during
  233. // message execution will result in a PeerError being sent on the BlockSyncChannel.
  234. // When the reactor is stopped, we will catch the signal and close the p2p Channel
  235. // gracefully.
  236. func (r *Reactor) processBlockSyncCh(ctx context.Context) {
  237. for {
  238. select {
  239. case <-ctx.Done():
  240. r.logger.Debug("stopped listening on block sync channel; closing...")
  241. return
  242. case envelope := <-r.blockSyncCh.In:
  243. if err := r.handleMessage(r.blockSyncCh.ID, envelope); err != nil {
  244. r.logger.Error("failed to process message", "ch_id", r.blockSyncCh.ID, "envelope", envelope, "err", err)
  245. if serr := r.blockSyncCh.SendError(ctx, p2p.PeerError{
  246. NodeID: envelope.From,
  247. Err: err,
  248. }); serr != nil {
  249. return
  250. }
  251. }
  252. case envelope := <-r.blockSyncOutBridgeCh:
  253. r.blockSyncCh.Out <- envelope
  254. }
  255. }
  256. }
  257. // processPeerUpdate processes a PeerUpdate.
  258. func (r *Reactor) processPeerUpdate(peerUpdate p2p.PeerUpdate) {
  259. r.logger.Debug("received peer update", "peer", peerUpdate.NodeID, "status", peerUpdate.Status)
  260. // XXX: Pool#RedoRequest can sometimes give us an empty peer.
  261. if len(peerUpdate.NodeID) == 0 {
  262. return
  263. }
  264. switch peerUpdate.Status {
  265. case p2p.PeerStatusUp:
  266. // send a status update the newly added peer
  267. r.blockSyncOutBridgeCh <- p2p.Envelope{
  268. To: peerUpdate.NodeID,
  269. Message: &bcproto.StatusResponse{
  270. Base: r.store.Base(),
  271. Height: r.store.Height(),
  272. },
  273. }
  274. case p2p.PeerStatusDown:
  275. r.pool.RemovePeer(peerUpdate.NodeID)
  276. }
  277. }
  278. // processPeerUpdates initiates a blocking process where we listen for and handle
  279. // PeerUpdate messages. When the reactor is stopped, we will catch the signal and
  280. // close the p2p PeerUpdatesCh gracefully.
  281. func (r *Reactor) processPeerUpdates(ctx context.Context) {
  282. for {
  283. select {
  284. case <-ctx.Done():
  285. r.logger.Debug("stopped listening on peer updates channel; closing...")
  286. return
  287. case peerUpdate := <-r.peerUpdates.Updates():
  288. r.processPeerUpdate(peerUpdate)
  289. }
  290. }
  291. }
  292. // SwitchToBlockSync is called by the state sync reactor when switching to fast
  293. // sync.
  294. func (r *Reactor) SwitchToBlockSync(ctx context.Context, state sm.State) error {
  295. r.blockSync.Set()
  296. r.initialState = state
  297. r.pool.height = state.LastBlockHeight + 1
  298. if err := r.pool.Start(ctx); err != nil {
  299. return err
  300. }
  301. r.syncStartTime = time.Now()
  302. r.poolWG.Add(1)
  303. go r.requestRoutine(ctx)
  304. r.poolWG.Add(1)
  305. go r.poolRoutine(ctx, true)
  306. return nil
  307. }
  308. func (r *Reactor) requestRoutine(ctx context.Context) {
  309. statusUpdateTicker := time.NewTicker(statusUpdateIntervalSeconds * time.Second)
  310. defer statusUpdateTicker.Stop()
  311. defer r.poolWG.Done()
  312. for {
  313. select {
  314. case <-ctx.Done():
  315. return
  316. case request := <-r.requestsCh:
  317. r.blockSyncOutBridgeCh <- p2p.Envelope{
  318. To: request.PeerID,
  319. Message: &bcproto.BlockRequest{Height: request.Height},
  320. }
  321. case pErr := <-r.errorsCh:
  322. if err := r.blockSyncCh.SendError(ctx, p2p.PeerError{
  323. NodeID: pErr.peerID,
  324. Err: pErr.err,
  325. }); err != nil {
  326. return
  327. }
  328. case <-statusUpdateTicker.C:
  329. r.poolWG.Add(1)
  330. go func() {
  331. defer r.poolWG.Done()
  332. select {
  333. case r.blockSyncOutBridgeCh <- p2p.Envelope{
  334. Broadcast: true,
  335. Message: &bcproto.StatusRequest{},
  336. }:
  337. case <-ctx.Done():
  338. }
  339. }()
  340. }
  341. }
  342. }
  343. // poolRoutine handles messages from the poolReactor telling the reactor what to
  344. // do.
  345. //
  346. // NOTE: Don't sleep in the FOR_LOOP or otherwise slow it down!
  347. func (r *Reactor) poolRoutine(ctx context.Context, stateSynced bool) {
  348. var (
  349. trySyncTicker = time.NewTicker(trySyncIntervalMS * time.Millisecond)
  350. switchToConsensusTicker = time.NewTicker(switchToConsensusIntervalSeconds * time.Second)
  351. blocksSynced = uint64(0)
  352. chainID = r.initialState.ChainID
  353. state = r.initialState
  354. lastHundred = time.Now()
  355. lastRate = 0.0
  356. didProcessCh = make(chan struct{}, 1)
  357. )
  358. defer trySyncTicker.Stop()
  359. defer switchToConsensusTicker.Stop()
  360. defer r.poolWG.Done()
  361. FOR_LOOP:
  362. for {
  363. select {
  364. case <-switchToConsensusTicker.C:
  365. var (
  366. height, numPending, lenRequesters = r.pool.GetStatus()
  367. lastAdvance = r.pool.LastAdvance()
  368. )
  369. r.logger.Debug(
  370. "consensus ticker",
  371. "num_pending", numPending,
  372. "total", lenRequesters,
  373. "height", height,
  374. )
  375. switch {
  376. case r.pool.IsCaughtUp():
  377. r.logger.Info("switching to consensus reactor", "height", height)
  378. case time.Since(lastAdvance) > syncTimeout:
  379. r.logger.Error("no progress since last advance", "last_advance", lastAdvance)
  380. default:
  381. r.logger.Info(
  382. "not caught up yet",
  383. "height", height,
  384. "max_peer_height", r.pool.MaxPeerHeight(),
  385. "timeout_in", syncTimeout-time.Since(lastAdvance),
  386. )
  387. continue
  388. }
  389. if err := r.pool.Stop(); err != nil {
  390. r.logger.Error("failed to stop pool", "err", err)
  391. }
  392. r.blockSync.UnSet()
  393. if r.consReactor != nil {
  394. r.consReactor.SwitchToConsensus(ctx, state, blocksSynced > 0 || stateSynced)
  395. }
  396. break FOR_LOOP
  397. case <-trySyncTicker.C:
  398. select {
  399. case didProcessCh <- struct{}{}:
  400. default:
  401. }
  402. case <-didProcessCh:
  403. // NOTE: It is a subtle mistake to process more than a single block at a
  404. // time (e.g. 10) here, because we only send one BlockRequest per loop
  405. // iteration. The ratio mismatch can result in starving of blocks, i.e. a
  406. // sudden burst of requests and responses, and repeat. Consequently, it is
  407. // better to split these routines rather than coupling them as it is
  408. // written here.
  409. //
  410. // TODO: Uncouple from request routine.
  411. // see if there are any blocks to sync
  412. first, second := r.pool.PeekTwoBlocks()
  413. if first == nil || second == nil {
  414. // we need both to sync the first block
  415. continue FOR_LOOP
  416. } else {
  417. // try again quickly next loop
  418. didProcessCh <- struct{}{}
  419. }
  420. var (
  421. firstParts = first.MakePartSet(types.BlockPartSizeBytes)
  422. firstPartSetHeader = firstParts.Header()
  423. firstID = types.BlockID{Hash: first.Hash(), PartSetHeader: firstPartSetHeader}
  424. )
  425. // Finally, verify the first block using the second's commit.
  426. //
  427. // NOTE: We can probably make this more efficient, but note that calling
  428. // first.Hash() doesn't verify the tx contents, so MakePartSet() is
  429. // currently necessary.
  430. err := state.Validators.VerifyCommitLight(chainID, firstID, first.Height, second.LastCommit)
  431. if err != nil {
  432. err = fmt.Errorf("invalid last commit: %w", err)
  433. r.logger.Error(
  434. err.Error(),
  435. "last_commit", second.LastCommit,
  436. "block_id", firstID,
  437. "height", first.Height,
  438. )
  439. // NOTE: We've already removed the peer's request, but we still need
  440. // to clean up the rest.
  441. peerID := r.pool.RedoRequest(first.Height)
  442. if serr := r.blockSyncCh.SendError(ctx, p2p.PeerError{
  443. NodeID: peerID,
  444. Err: err,
  445. }); serr != nil {
  446. break FOR_LOOP
  447. }
  448. peerID2 := r.pool.RedoRequest(second.Height)
  449. if peerID2 != peerID {
  450. if serr := r.blockSyncCh.SendError(ctx, p2p.PeerError{
  451. NodeID: peerID2,
  452. Err: err,
  453. }); serr != nil {
  454. break FOR_LOOP
  455. }
  456. }
  457. continue FOR_LOOP
  458. } else {
  459. r.pool.PopRequest()
  460. // TODO: batch saves so we do not persist to disk every block
  461. r.store.SaveBlock(first, firstParts, second.LastCommit)
  462. var err error
  463. // TODO: Same thing for app - but we would need a way to get the hash
  464. // without persisting the state.
  465. state, err = r.blockExec.ApplyBlock(ctx, state, firstID, first)
  466. if err != nil {
  467. // TODO: This is bad, are we zombie?
  468. panic(fmt.Sprintf("failed to process committed block (%d:%X): %v", first.Height, first.Hash(), err))
  469. }
  470. r.metrics.RecordConsMetrics(first)
  471. blocksSynced++
  472. if blocksSynced%100 == 0 {
  473. lastRate = 0.9*lastRate + 0.1*(100/time.Since(lastHundred).Seconds())
  474. r.logger.Info(
  475. "block sync rate",
  476. "height", r.pool.height,
  477. "max_peer_height", r.pool.MaxPeerHeight(),
  478. "blocks/s", lastRate,
  479. )
  480. lastHundred = time.Now()
  481. }
  482. }
  483. continue FOR_LOOP
  484. case <-ctx.Done():
  485. break FOR_LOOP
  486. case <-r.pool.exitedCh:
  487. break FOR_LOOP
  488. }
  489. }
  490. }
  491. func (r *Reactor) GetMaxPeerBlockHeight() int64 {
  492. return r.pool.MaxPeerHeight()
  493. }
  494. func (r *Reactor) GetTotalSyncedTime() time.Duration {
  495. if !r.blockSync.IsSet() || r.syncStartTime.IsZero() {
  496. return time.Duration(0)
  497. }
  498. return time.Since(r.syncStartTime)
  499. }
  500. func (r *Reactor) GetRemainingSyncTime() time.Duration {
  501. if !r.blockSync.IsSet() {
  502. return time.Duration(0)
  503. }
  504. targetSyncs := r.pool.targetSyncBlocks()
  505. currentSyncs := r.store.Height() - r.pool.startHeight + 1
  506. lastSyncRate := r.pool.getLastSyncRate()
  507. if currentSyncs < 0 || lastSyncRate < 0.001 {
  508. return time.Duration(0)
  509. }
  510. remain := float64(targetSyncs-currentSyncs) / lastSyncRate
  511. return time.Duration(int64(remain * float64(time.Second)))
  512. }