You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

632 lines
17 KiB

cleanup: Reduce and normalize import path aliasing. (#6975) The code in the Tendermint repository makes heavy use of import aliasing. This is made necessary by our extensive reuse of common base package names, and by repetition of similar names across different subdirectories. Unfortunately we have not been very consistent about which packages we alias in various circumstances, and the aliases we use vary. In the spirit of the advice in the style guide and https://github.com/golang/go/wiki/CodeReviewComments#imports, his change makes an effort to clean up and normalize import aliasing. This change makes no API or behavioral changes. It is a pure cleanup intended o help make the code more readable to developers (including myself) trying to understand what is being imported where. Only unexported names have been modified, and the changes were generated and applied mechanically with gofmt -r and comby, respecting the lexical and syntactic rules of Go. Even so, I did not fix every inconsistency. Where the changes would be too disruptive, I left it alone. The principles I followed in this cleanup are: - Remove aliases that restate the package name. - Remove aliases where the base package name is unambiguous. - Move overly-terse abbreviations from the import to the usage site. - Fix lexical issues (remove underscores, remove capitalization). - Fix import groupings to more closely match the style guide. - Group blank (side-effecting) imports and ensure they are commented. - Add aliases to multiple imports with the same base package name.
3 years ago
cleanup: Reduce and normalize import path aliasing. (#6975) The code in the Tendermint repository makes heavy use of import aliasing. This is made necessary by our extensive reuse of common base package names, and by repetition of similar names across different subdirectories. Unfortunately we have not been very consistent about which packages we alias in various circumstances, and the aliases we use vary. In the spirit of the advice in the style guide and https://github.com/golang/go/wiki/CodeReviewComments#imports, his change makes an effort to clean up and normalize import aliasing. This change makes no API or behavioral changes. It is a pure cleanup intended o help make the code more readable to developers (including myself) trying to understand what is being imported where. Only unexported names have been modified, and the changes were generated and applied mechanically with gofmt -r and comby, respecting the lexical and syntactic rules of Go. Even so, I did not fix every inconsistency. Where the changes would be too disruptive, I left it alone. The principles I followed in this cleanup are: - Remove aliases that restate the package name. - Remove aliases where the base package name is unambiguous. - Move overly-terse abbreviations from the import to the usage site. - Fix lexical issues (remove underscores, remove capitalization). - Fix import groupings to more closely match the style guide. - Group blank (side-effecting) imports and ensure they are commented. - Add aliases to multiple imports with the same base package name.
3 years ago
  1. package blocksync
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "runtime/debug"
  7. "sync/atomic"
  8. "time"
  9. "github.com/tendermint/tendermint/internal/consensus"
  10. "github.com/tendermint/tendermint/internal/eventbus"
  11. "github.com/tendermint/tendermint/internal/p2p"
  12. sm "github.com/tendermint/tendermint/internal/state"
  13. "github.com/tendermint/tendermint/internal/store"
  14. "github.com/tendermint/tendermint/libs/log"
  15. "github.com/tendermint/tendermint/libs/service"
  16. bcproto "github.com/tendermint/tendermint/proto/tendermint/blocksync"
  17. "github.com/tendermint/tendermint/types"
  18. )
  19. var _ service.Service = (*Reactor)(nil)
  20. const (
  21. // BlockSyncChannel is a channel for blocks and status updates
  22. BlockSyncChannel = p2p.ChannelID(0x40)
  23. trySyncIntervalMS = 10
  24. // ask for best height every 10s
  25. statusUpdateIntervalSeconds = 10
  26. // check if we should switch to consensus reactor
  27. switchToConsensusIntervalSeconds = 1
  28. // switch to consensus after this duration of inactivity
  29. syncTimeout = 60 * time.Second
  30. )
  31. func GetChannelDescriptor() *p2p.ChannelDescriptor {
  32. return &p2p.ChannelDescriptor{
  33. ID: BlockSyncChannel,
  34. MessageType: new(bcproto.Message),
  35. Priority: 5,
  36. SendQueueCapacity: 1000,
  37. RecvBufferCapacity: 1024,
  38. RecvMessageCapacity: MaxMsgSize,
  39. }
  40. }
  41. type consensusReactor interface {
  42. // For when we switch from block sync reactor to the consensus
  43. // machine.
  44. SwitchToConsensus(ctx context.Context, state sm.State, skipWAL bool)
  45. }
  46. type peerError struct {
  47. err error
  48. peerID types.NodeID
  49. }
  50. func (e peerError) Error() string {
  51. return fmt.Sprintf("error with peer %v: %s", e.peerID, e.err.Error())
  52. }
  53. // Reactor handles long-term catchup syncing.
  54. type Reactor struct {
  55. service.BaseService
  56. logger log.Logger
  57. // immutable
  58. initialState sm.State
  59. // store
  60. stateStore sm.Store
  61. blockExec *sm.BlockExecutor
  62. store *store.BlockStore
  63. pool *BlockPool
  64. consReactor consensusReactor
  65. blockSync *atomicBool
  66. blockSyncCh *p2p.Channel
  67. peerUpdates *p2p.PeerUpdates
  68. requestsCh <-chan BlockRequest
  69. errorsCh <-chan peerError
  70. metrics *consensus.Metrics
  71. eventBus *eventbus.EventBus
  72. syncStartTime time.Time
  73. }
  74. // NewReactor returns new reactor instance.
  75. func NewReactor(
  76. ctx context.Context,
  77. logger log.Logger,
  78. stateStore sm.Store,
  79. blockExec *sm.BlockExecutor,
  80. store *store.BlockStore,
  81. consReactor consensusReactor,
  82. channelCreator p2p.ChannelCreator,
  83. peerUpdates *p2p.PeerUpdates,
  84. blockSync bool,
  85. metrics *consensus.Metrics,
  86. eventBus *eventbus.EventBus,
  87. ) (*Reactor, error) {
  88. blockSyncCh, err := channelCreator(ctx, GetChannelDescriptor())
  89. if err != nil {
  90. return nil, err
  91. }
  92. r := &Reactor{
  93. logger: logger,
  94. stateStore: stateStore,
  95. blockExec: blockExec,
  96. store: store,
  97. consReactor: consReactor,
  98. blockSync: newAtomicBool(blockSync),
  99. blockSyncCh: blockSyncCh,
  100. peerUpdates: peerUpdates,
  101. metrics: metrics,
  102. eventBus: eventBus,
  103. }
  104. r.BaseService = *service.NewBaseService(logger, "BlockSync", r)
  105. return r, nil
  106. }
  107. // OnStart starts separate go routines for each p2p Channel and listens for
  108. // envelopes on each. In addition, it also listens for peer updates and handles
  109. // messages on that p2p channel accordingly. The caller must be sure to execute
  110. // OnStop to ensure the outbound p2p Channels are closed.
  111. //
  112. // If blockSync is enabled, we also start the pool and the pool processing
  113. // goroutine. If the pool fails to start, an error is returned.
  114. func (r *Reactor) OnStart(ctx context.Context) error {
  115. state, err := r.stateStore.Load()
  116. if err != nil {
  117. return err
  118. }
  119. r.initialState = state
  120. if state.LastBlockHeight != r.store.Height() {
  121. return fmt.Errorf("state (%v) and store (%v) height mismatch", state.LastBlockHeight, r.store.Height())
  122. }
  123. startHeight := r.store.Height() + 1
  124. if startHeight == 1 {
  125. startHeight = state.InitialHeight
  126. }
  127. requestsCh := make(chan BlockRequest, maxTotalRequesters)
  128. errorsCh := make(chan peerError, maxPeerErrBuffer) // NOTE: The capacity should be larger than the peer count.
  129. r.pool = NewBlockPool(r.logger, startHeight, requestsCh, errorsCh)
  130. r.requestsCh = requestsCh
  131. r.errorsCh = errorsCh
  132. if r.blockSync.IsSet() {
  133. if err := r.pool.Start(ctx); err != nil {
  134. return err
  135. }
  136. go r.requestRoutine(ctx)
  137. go r.poolRoutine(ctx, false)
  138. }
  139. go r.processBlockSyncCh(ctx)
  140. go r.processPeerUpdates(ctx)
  141. return nil
  142. }
  143. // OnStop stops the reactor by signaling to all spawned goroutines to exit and
  144. // blocking until they all exit.
  145. func (r *Reactor) OnStop() {
  146. if r.blockSync.IsSet() {
  147. r.pool.Stop()
  148. }
  149. }
  150. // respondToPeer loads a block and sends it to the requesting peer, if we have it.
  151. // Otherwise, we'll respond saying we do not have it.
  152. func (r *Reactor) respondToPeer(ctx context.Context, msg *bcproto.BlockRequest, peerID types.NodeID) error {
  153. block := r.store.LoadBlock(msg.Height)
  154. if block != nil {
  155. blockProto, err := block.ToProto()
  156. if err != nil {
  157. r.logger.Error("failed to convert msg to protobuf", "err", err)
  158. return err
  159. }
  160. return r.blockSyncCh.Send(ctx, p2p.Envelope{
  161. To: peerID,
  162. Message: &bcproto.BlockResponse{Block: blockProto},
  163. })
  164. }
  165. r.logger.Info("peer requesting a block we do not have", "peer", peerID, "height", msg.Height)
  166. return r.blockSyncCh.Send(ctx, p2p.Envelope{
  167. To: peerID,
  168. Message: &bcproto.NoBlockResponse{Height: msg.Height},
  169. })
  170. }
  171. // handleBlockSyncMessage handles envelopes sent from peers on the
  172. // BlockSyncChannel. It returns an error only if the Envelope.Message is unknown
  173. // for this channel. This should never be called outside of handleMessage.
  174. func (r *Reactor) handleBlockSyncMessage(ctx context.Context, envelope *p2p.Envelope) error {
  175. logger := r.logger.With("peer", envelope.From)
  176. switch msg := envelope.Message.(type) {
  177. case *bcproto.BlockRequest:
  178. return r.respondToPeer(ctx, msg, envelope.From)
  179. case *bcproto.BlockResponse:
  180. block, err := types.BlockFromProto(msg.Block)
  181. if err != nil {
  182. logger.Error("failed to convert block from proto", "err", err)
  183. return err
  184. }
  185. r.pool.AddBlock(envelope.From, block, block.Size())
  186. case *bcproto.StatusRequest:
  187. return r.blockSyncCh.Send(ctx, p2p.Envelope{
  188. To: envelope.From,
  189. Message: &bcproto.StatusResponse{
  190. Height: r.store.Height(),
  191. Base: r.store.Base(),
  192. },
  193. })
  194. case *bcproto.StatusResponse:
  195. r.pool.SetPeerRange(envelope.From, msg.Base, msg.Height)
  196. case *bcproto.NoBlockResponse:
  197. logger.Debug("peer does not have the requested block", "height", msg.Height)
  198. default:
  199. return fmt.Errorf("received unknown message: %T", msg)
  200. }
  201. return nil
  202. }
  203. // handleMessage handles an Envelope sent from a peer on a specific p2p Channel.
  204. // It will handle errors and any possible panics gracefully. A caller can handle
  205. // any error returned by sending a PeerError on the respective channel.
  206. func (r *Reactor) handleMessage(ctx context.Context, chID p2p.ChannelID, envelope *p2p.Envelope) (err error) {
  207. defer func() {
  208. if e := recover(); e != nil {
  209. err = fmt.Errorf("panic in processing message: %v", e)
  210. r.logger.Error(
  211. "recovering from processing message panic",
  212. "err", err,
  213. "stack", string(debug.Stack()),
  214. )
  215. }
  216. }()
  217. r.logger.Debug("received message", "message", envelope.Message, "peer", envelope.From)
  218. switch chID {
  219. case BlockSyncChannel:
  220. err = r.handleBlockSyncMessage(ctx, envelope)
  221. default:
  222. err = fmt.Errorf("unknown channel ID (%d) for envelope (%v)", chID, envelope)
  223. }
  224. return err
  225. }
  226. // processBlockSyncCh initiates a blocking process where we listen for and handle
  227. // envelopes on the BlockSyncChannel and blockSyncOutBridgeCh. Any error encountered during
  228. // message execution will result in a PeerError being sent on the BlockSyncChannel.
  229. // When the reactor is stopped, we will catch the signal and close the p2p Channel
  230. // gracefully.
  231. func (r *Reactor) processBlockSyncCh(ctx context.Context) {
  232. iter := r.blockSyncCh.Receive(ctx)
  233. for iter.Next(ctx) {
  234. envelope := iter.Envelope()
  235. if err := r.handleMessage(ctx, r.blockSyncCh.ID, envelope); err != nil {
  236. if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
  237. return
  238. }
  239. r.logger.Error("failed to process message", "ch_id", r.blockSyncCh.ID, "envelope", envelope, "err", err)
  240. if serr := r.blockSyncCh.SendError(ctx, p2p.PeerError{
  241. NodeID: envelope.From,
  242. Err: err,
  243. }); serr != nil {
  244. return
  245. }
  246. }
  247. }
  248. }
  249. // processPeerUpdate processes a PeerUpdate.
  250. func (r *Reactor) processPeerUpdate(ctx context.Context, peerUpdate p2p.PeerUpdate) {
  251. r.logger.Debug("received peer update", "peer", peerUpdate.NodeID, "status", peerUpdate.Status)
  252. // XXX: Pool#RedoRequest can sometimes give us an empty peer.
  253. if len(peerUpdate.NodeID) == 0 {
  254. return
  255. }
  256. switch peerUpdate.Status {
  257. case p2p.PeerStatusUp:
  258. // send a status update the newly added peer
  259. if err := r.blockSyncCh.Send(ctx, p2p.Envelope{
  260. To: peerUpdate.NodeID,
  261. Message: &bcproto.StatusResponse{
  262. Base: r.store.Base(),
  263. Height: r.store.Height(),
  264. },
  265. }); err != nil {
  266. r.pool.RemovePeer(peerUpdate.NodeID)
  267. if err := r.blockSyncCh.SendError(ctx, p2p.PeerError{
  268. NodeID: peerUpdate.NodeID,
  269. Err: err,
  270. }); err != nil {
  271. return
  272. }
  273. }
  274. case p2p.PeerStatusDown:
  275. r.pool.RemovePeer(peerUpdate.NodeID)
  276. }
  277. }
  278. // processPeerUpdates initiates a blocking process where we listen for and handle
  279. // PeerUpdate messages. When the reactor is stopped, we will catch the signal and
  280. // close the p2p PeerUpdatesCh gracefully.
  281. func (r *Reactor) processPeerUpdates(ctx context.Context) {
  282. for {
  283. select {
  284. case <-ctx.Done():
  285. return
  286. case peerUpdate := <-r.peerUpdates.Updates():
  287. r.processPeerUpdate(ctx, peerUpdate)
  288. }
  289. }
  290. }
  291. // SwitchToBlockSync is called by the state sync reactor when switching to fast
  292. // sync.
  293. func (r *Reactor) SwitchToBlockSync(ctx context.Context, state sm.State) error {
  294. r.blockSync.Set()
  295. r.initialState = state
  296. r.pool.height = state.LastBlockHeight + 1
  297. if err := r.pool.Start(ctx); err != nil {
  298. return err
  299. }
  300. r.syncStartTime = time.Now()
  301. go r.requestRoutine(ctx)
  302. go r.poolRoutine(ctx, true)
  303. return nil
  304. }
  305. func (r *Reactor) requestRoutine(ctx context.Context) {
  306. statusUpdateTicker := time.NewTicker(statusUpdateIntervalSeconds * time.Second)
  307. defer statusUpdateTicker.Stop()
  308. for {
  309. select {
  310. case <-ctx.Done():
  311. return
  312. case request := <-r.requestsCh:
  313. if err := r.blockSyncCh.Send(ctx, p2p.Envelope{
  314. To: request.PeerID,
  315. Message: &bcproto.BlockRequest{Height: request.Height},
  316. }); err != nil {
  317. if err := r.blockSyncCh.SendError(ctx, p2p.PeerError{
  318. NodeID: request.PeerID,
  319. Err: err,
  320. }); err != nil {
  321. return
  322. }
  323. }
  324. case pErr := <-r.errorsCh:
  325. if err := r.blockSyncCh.SendError(ctx, p2p.PeerError{
  326. NodeID: pErr.peerID,
  327. Err: pErr.err,
  328. }); err != nil {
  329. return
  330. }
  331. case <-statusUpdateTicker.C:
  332. if err := r.blockSyncCh.Send(ctx, p2p.Envelope{
  333. Broadcast: true,
  334. Message: &bcproto.StatusRequest{},
  335. }); err != nil {
  336. return
  337. }
  338. }
  339. }
  340. }
  341. // poolRoutine handles messages from the poolReactor telling the reactor what to
  342. // do.
  343. //
  344. // NOTE: Don't sleep in the FOR_LOOP or otherwise slow it down!
  345. func (r *Reactor) poolRoutine(ctx context.Context, stateSynced bool) {
  346. var (
  347. trySyncTicker = time.NewTicker(trySyncIntervalMS * time.Millisecond)
  348. switchToConsensusTicker = time.NewTicker(switchToConsensusIntervalSeconds * time.Second)
  349. blocksSynced = uint64(0)
  350. chainID = r.initialState.ChainID
  351. state = r.initialState
  352. lastHundred = time.Now()
  353. lastRate = 0.0
  354. didProcessCh = make(chan struct{}, 1)
  355. )
  356. defer trySyncTicker.Stop()
  357. defer switchToConsensusTicker.Stop()
  358. for {
  359. select {
  360. case <-ctx.Done():
  361. return
  362. case <-switchToConsensusTicker.C:
  363. var (
  364. height, numPending, lenRequesters = r.pool.GetStatus()
  365. lastAdvance = r.pool.LastAdvance()
  366. )
  367. r.logger.Debug(
  368. "consensus ticker",
  369. "num_pending", numPending,
  370. "total", lenRequesters,
  371. "height", height,
  372. )
  373. switch {
  374. case r.pool.IsCaughtUp():
  375. r.logger.Info("switching to consensus reactor", "height", height)
  376. case time.Since(lastAdvance) > syncTimeout:
  377. r.logger.Error("no progress since last advance", "last_advance", lastAdvance)
  378. default:
  379. r.logger.Info(
  380. "not caught up yet",
  381. "height", height,
  382. "max_peer_height", r.pool.MaxPeerHeight(),
  383. "timeout_in", syncTimeout-time.Since(lastAdvance),
  384. )
  385. continue
  386. }
  387. r.pool.Stop()
  388. r.blockSync.UnSet()
  389. if r.consReactor != nil {
  390. r.consReactor.SwitchToConsensus(ctx, state, blocksSynced > 0 || stateSynced)
  391. }
  392. return
  393. case <-trySyncTicker.C:
  394. select {
  395. case didProcessCh <- struct{}{}:
  396. default:
  397. }
  398. case <-didProcessCh:
  399. // NOTE: It is a subtle mistake to process more than a single block at a
  400. // time (e.g. 10) here, because we only send one BlockRequest per loop
  401. // iteration. The ratio mismatch can result in starving of blocks, i.e. a
  402. // sudden burst of requests and responses, and repeat. Consequently, it is
  403. // better to split these routines rather than coupling them as it is
  404. // written here.
  405. //
  406. // TODO: Uncouple from request routine.
  407. // see if there are any blocks to sync
  408. first, second := r.pool.PeekTwoBlocks()
  409. if first == nil || second == nil {
  410. // we need both to sync the first block
  411. continue
  412. } else {
  413. // try again quickly next loop
  414. didProcessCh <- struct{}{}
  415. }
  416. firstParts, err := first.MakePartSet(types.BlockPartSizeBytes)
  417. if err != nil {
  418. r.logger.Error("failed to make ",
  419. "height", first.Height,
  420. "err", err.Error())
  421. return
  422. }
  423. var (
  424. firstPartSetHeader = firstParts.Header()
  425. firstID = types.BlockID{Hash: first.Hash(), PartSetHeader: firstPartSetHeader}
  426. )
  427. // Finally, verify the first block using the second's commit.
  428. //
  429. // NOTE: We can probably make this more efficient, but note that calling
  430. // first.Hash() doesn't verify the tx contents, so MakePartSet() is
  431. // currently necessary.
  432. if err = state.Validators.VerifyCommitLight(chainID, firstID, first.Height, second.LastCommit); err != nil {
  433. err = fmt.Errorf("invalid last commit: %w", err)
  434. r.logger.Error(
  435. err.Error(),
  436. "last_commit", second.LastCommit,
  437. "block_id", firstID,
  438. "height", first.Height,
  439. )
  440. // NOTE: We've already removed the peer's request, but we still need
  441. // to clean up the rest.
  442. peerID := r.pool.RedoRequest(first.Height)
  443. if serr := r.blockSyncCh.SendError(ctx, p2p.PeerError{
  444. NodeID: peerID,
  445. Err: err,
  446. }); serr != nil {
  447. return
  448. }
  449. peerID2 := r.pool.RedoRequest(second.Height)
  450. if peerID2 != peerID {
  451. if serr := r.blockSyncCh.SendError(ctx, p2p.PeerError{
  452. NodeID: peerID2,
  453. Err: err,
  454. }); serr != nil {
  455. return
  456. }
  457. }
  458. } else {
  459. r.pool.PopRequest()
  460. // TODO: batch saves so we do not persist to disk every block
  461. r.store.SaveBlock(first, firstParts, second.LastCommit)
  462. var err error
  463. // TODO: Same thing for app - but we would need a way to get the hash
  464. // without persisting the state.
  465. state, err = r.blockExec.ApplyBlock(ctx, state, firstID, first)
  466. if err != nil {
  467. // TODO: This is bad, are we zombie?
  468. panic(fmt.Sprintf("failed to process committed block (%d:%X): %v", first.Height, first.Hash(), err))
  469. }
  470. r.metrics.RecordConsMetrics(first)
  471. blocksSynced++
  472. if blocksSynced%100 == 0 {
  473. lastRate = 0.9*lastRate + 0.1*(100/time.Since(lastHundred).Seconds())
  474. r.logger.Info(
  475. "block sync rate",
  476. "height", r.pool.height,
  477. "max_peer_height", r.pool.MaxPeerHeight(),
  478. "blocks/s", lastRate,
  479. )
  480. lastHundred = time.Now()
  481. }
  482. }
  483. }
  484. }
  485. }
  486. func (r *Reactor) GetMaxPeerBlockHeight() int64 {
  487. return r.pool.MaxPeerHeight()
  488. }
  489. func (r *Reactor) GetTotalSyncedTime() time.Duration {
  490. if !r.blockSync.IsSet() || r.syncStartTime.IsZero() {
  491. return time.Duration(0)
  492. }
  493. return time.Since(r.syncStartTime)
  494. }
  495. func (r *Reactor) GetRemainingSyncTime() time.Duration {
  496. if !r.blockSync.IsSet() {
  497. return time.Duration(0)
  498. }
  499. targetSyncs := r.pool.targetSyncBlocks()
  500. currentSyncs := r.store.Height() - r.pool.startHeight + 1
  501. lastSyncRate := r.pool.getLastSyncRate()
  502. if currentSyncs < 0 || lastSyncRate < 0.001 {
  503. return time.Duration(0)
  504. }
  505. remain := float64(targetSyncs-currentSyncs) / lastSyncRate
  506. return time.Duration(int64(remain * float64(time.Second)))
  507. }
  508. func (r *Reactor) PublishStatus(ctx context.Context, event types.EventDataBlockSyncStatus) error {
  509. if r.eventBus == nil {
  510. return errors.New("event bus is not configured")
  511. }
  512. return r.eventBus.PublishEventBlockSyncStatus(ctx, event)
  513. }
  514. // atomicBool is an atomic Boolean, safe for concurrent use by multiple
  515. // goroutines.
  516. type atomicBool int32
  517. // newAtomicBool creates an atomicBool with given initial value.
  518. func newAtomicBool(ok bool) *atomicBool {
  519. ab := new(atomicBool)
  520. if ok {
  521. ab.Set()
  522. }
  523. return ab
  524. }
  525. // Set sets the Boolean to true.
  526. func (ab *atomicBool) Set() { atomic.StoreInt32((*int32)(ab), 1) }
  527. // UnSet sets the Boolean to false.
  528. func (ab *atomicBool) UnSet() { atomic.StoreInt32((*int32)(ab), 0) }
  529. // IsSet returns whether the Boolean is true.
  530. func (ab *atomicBool) IsSet() bool { return atomic.LoadInt32((*int32)(ab))&1 == 1 }