zolfa
/
tendermint


								package statesync


								import (

									"bytes"

									"context"

									"errors"

									"fmt"

									"reflect"

									"runtime/debug"

									"sort"

									"sync"

									"time"


									abci "github.com/tendermint/tendermint/abci/types"

									"github.com/tendermint/tendermint/config"

									"github.com/tendermint/tendermint/internal/p2p"

									"github.com/tendermint/tendermint/internal/proxy"

									sm "github.com/tendermint/tendermint/internal/state"

									"github.com/tendermint/tendermint/internal/store"

									"github.com/tendermint/tendermint/libs/log"

									"github.com/tendermint/tendermint/libs/service"

									"github.com/tendermint/tendermint/light"

									"github.com/tendermint/tendermint/light/provider"

									ssproto "github.com/tendermint/tendermint/proto/tendermint/statesync"

									"github.com/tendermint/tendermint/types"

								)


								var (

									_ service.Service = (*Reactor)(nil)

									_ p2p.Wrapper     = (*ssproto.Message)(nil)

								)


								const (

									// SnapshotChannel exchanges snapshot metadata

									SnapshotChannel = p2p.ChannelID(0x60)


									// ChunkChannel exchanges chunk contents

									ChunkChannel = p2p.ChannelID(0x61)


									// LightBlockChannel exchanges light blocks

									LightBlockChannel = p2p.ChannelID(0x62)


									// ParamsChannel exchanges consensus params

									ParamsChannel = p2p.ChannelID(0x63)


									// recentSnapshots is the number of recent snapshots to send and receive per peer.

									recentSnapshots = 10


									// snapshotMsgSize is the maximum size of a snapshotResponseMessage

									snapshotMsgSize = int(4e6) // ~4MB


									// chunkMsgSize is the maximum size of a chunkResponseMessage

									chunkMsgSize = int(16e6) // ~16MB


									// lightBlockMsgSize is the maximum size of a lightBlockResponseMessage

									lightBlockMsgSize = int(1e7) // ~1MB


									// paramMsgSize is the maximum size of a paramsResponseMessage

									paramMsgSize = int(1e5) // ~100kb


									// lightBlockResponseTimeout is how long the dispatcher waits for a peer to

									// return a light block

									lightBlockResponseTimeout = 10 * time.Second


									// consensusParamsResponseTimeout is the time the p2p state provider waits

									// before performing a secondary call

									consensusParamsResponseTimeout = 5 * time.Second


									// maxLightBlockRequestRetries is the amount of retries acceptable before

									// the backfill process aborts

									maxLightBlockRequestRetries = 20

								)


								func GetChannelDescriptors() []*p2p.ChannelDescriptor {

									return []*p2p.ChannelDescriptor{

										{


											ID:                  SnapshotChannel,

											MessageType:         new(ssproto.Message),

											Priority:            6,

											SendQueueCapacity:   10,

											RecvMessageCapacity: snapshotMsgSize,

											RecvBufferCapacity:  128,

										},

										{

											ID:                  ChunkChannel,

											Priority:            3,

											MessageType:         new(ssproto.Message),

											SendQueueCapacity:   4,

											RecvMessageCapacity: chunkMsgSize,

											RecvBufferCapacity:  128,

										},

										{

											ID:                  LightBlockChannel,

											MessageType:         new(ssproto.Message),

											Priority:            5,

											SendQueueCapacity:   10,

											RecvMessageCapacity: lightBlockMsgSize,

											RecvBufferCapacity:  128,

										},

										{

											ID:                  ParamsChannel,

											MessageType:         new(ssproto.Message),

											Priority:            2,

											SendQueueCapacity:   10,

											RecvMessageCapacity: paramMsgSize,

											RecvBufferCapacity:  128,

										},

									}


								}


								// Metricer defines an interface used for the rpc sync info query, please see statesync.metrics

								// for the details.

								type Metricer interface {

									TotalSnapshots() int64

									ChunkProcessAvgTime() time.Duration

									SnapshotHeight() int64

									SnapshotChunksCount() int64

									SnapshotChunksTotal() int64

									BackFilledBlocks() int64

									BackFillBlocksTotal() int64

								}


								// Reactor handles state sync, both restoring snapshots for the local node and

								// serving snapshots for other nodes.

								type Reactor struct {

									service.BaseService

									logger log.Logger


									chainID       string

									initialHeight int64

									cfg           config.StateSyncConfig

									stateStore    sm.Store

									blockStore    *store.BlockStore


									conn        proxy.AppConnSnapshot

									connQuery   proxy.AppConnQuery

									tempDir     string

									snapshotCh  *p2p.Channel

									chunkCh     *p2p.Channel

									blockCh     *p2p.Channel

									paramsCh    *p2p.Channel

									peerUpdates *p2p.PeerUpdates


									// Dispatcher is used to multiplex light block requests and responses over multiple

									// peers used by the p2p state provider and in reverse sync.

									dispatcher *Dispatcher

									peers      *peerList


									// These will only be set when a state sync is in progress. It is used to feed

									// received snapshots and chunks into the syncer and manage incoming and outgoing

									// providers.

									mtx           sync.RWMutex

									syncer        *syncer

									providers     map[types.NodeID]*BlockProvider

									stateProvider StateProvider


									metrics            *Metrics

									backfillBlockTotal int64

									backfilledBlocks   int64

								}


								// NewReactor returns a reference to a new state sync reactor, which implements

								// the service.Service interface. It accepts a logger, connections for snapshots

								// and querying, references to p2p Channels and a channel to listen for peer

								// updates on. Note, the reactor will close all p2p Channels when stopping.

								func NewReactor(

									chainID string,

									initialHeight int64,

									cfg config.StateSyncConfig,

									logger log.Logger,

									conn proxy.AppConnSnapshot,

									connQuery proxy.AppConnQuery,

									snapshotCh, chunkCh, blockCh, paramsCh *p2p.Channel,

									peerUpdates *p2p.PeerUpdates,

									stateStore sm.Store,

									blockStore *store.BlockStore,

									tempDir string,

									ssMetrics *Metrics,

								) *Reactor {

									r := &Reactor{

										logger:        logger,

										chainID:       chainID,

										initialHeight: initialHeight,

										cfg:           cfg,

										conn:          conn,

										connQuery:     connQuery,

										snapshotCh:    snapshotCh,

										chunkCh:       chunkCh,

										blockCh:       blockCh,

										paramsCh:      paramsCh,

										peerUpdates:   peerUpdates,

										tempDir:       tempDir,

										stateStore:    stateStore,

										blockStore:    blockStore,

										peers:         newPeerList(),

										dispatcher:    NewDispatcher(blockCh),

										providers:     make(map[types.NodeID]*BlockProvider),

										metrics:       ssMetrics,

									}


									r.BaseService = *service.NewBaseService(logger, "StateSync", r)

									return r

								}


								// OnStart starts separate go routines for each p2p Channel and listens for

								// envelopes on each. In addition, it also listens for peer updates and handles

								// messages on that p2p channel accordingly. Note, we do not launch a go-routine to

								// handle individual envelopes as to not have to deal with bounding workers or pools.

								// The caller must be sure to execute OnStop to ensure the outbound p2p Channels are

								// closed. No error is returned.

								func (r *Reactor) OnStart(ctx context.Context) error {

									go r.processCh(ctx, r.snapshotCh, "snapshot")

									go r.processCh(ctx, r.chunkCh, "chunk")

									go r.processCh(ctx, r.blockCh, "light block")

									go r.processCh(ctx, r.paramsCh, "consensus params")

									go r.processPeerUpdates(ctx)


									return nil

								}


								// OnStop stops the reactor by signaling to all spawned goroutines to exit and

								// blocking until they all exit.

								func (r *Reactor) OnStop() {

									// tell the dispatcher to stop sending any more requests

									r.dispatcher.Close()

								}


								// Sync runs a state sync, fetching snapshots and providing chunks to the

								// application. At the close of the operation, Sync will bootstrap the state

								// store and persist the commit at that height so that either consensus or

								// blocksync can commence. It will then proceed to backfill the necessary amount

								// of historical blocks before participating in consensus

								func (r *Reactor) Sync(ctx context.Context) (sm.State, error) {

									// We need at least two peers (for cross-referencing of light blocks) before we can

									// begin state sync

									if err := r.waitForEnoughPeers(ctx, 2); err != nil {

										return sm.State{}, err

									}


									r.mtx.Lock()

									if r.syncer != nil {

										r.mtx.Unlock()

										return sm.State{}, errors.New("a state sync is already in progress")

									}


									if err := r.initStateProvider(ctx, r.chainID, r.initialHeight); err != nil {

										r.mtx.Unlock()

										return sm.State{}, err

									}


									r.syncer = newSyncer(

										r.cfg,

										r.logger,

										r.conn,

										r.connQuery,

										r.stateProvider,

										r.snapshotCh,

										r.chunkCh,

										r.tempDir,

										r.metrics,

									)

									r.mtx.Unlock()

									defer func() {

										r.mtx.Lock()

										// reset syncing objects at the close of Sync

										r.syncer = nil

										r.stateProvider = nil

										r.mtx.Unlock()

									}()


									requestSnapshotsHook := func() error {

										// request snapshots from all currently connected peers

										return r.snapshotCh.Send(ctx, p2p.Envelope{

											Broadcast: true,

											Message:   &ssproto.SnapshotsRequest{},

										})

									}


									state, commit, err := r.syncer.SyncAny(ctx, r.cfg.DiscoveryTime, requestSnapshotsHook)

									if err != nil {

										return sm.State{}, err

									}


									err = r.stateStore.Bootstrap(state)

									if err != nil {

										return sm.State{}, fmt.Errorf("failed to bootstrap node with new state: %w", err)

									}


									err = r.blockStore.SaveSeenCommit(state.LastBlockHeight, commit)

									if err != nil {

										return sm.State{}, fmt.Errorf("failed to store last seen commit: %w", err)

									}


									err = r.Backfill(ctx, state)

									if err != nil {

										r.logger.Error("backfill failed. Proceeding optimistically...", "err", err)

									}


									return state, nil

								}


								// Backfill sequentially fetches, verifies and stores light blocks in reverse

								// order. It does not stop verifying blocks until reaching a block with a height

								// and time that is less or equal to the stopHeight and stopTime. The

								// trustedBlockID should be of the header at startHeight.

								func (r *Reactor) Backfill(ctx context.Context, state sm.State) error {

									params := state.ConsensusParams.Evidence

									stopHeight := state.LastBlockHeight - params.MaxAgeNumBlocks

									stopTime := state.LastBlockTime.Add(-params.MaxAgeDuration)

									// ensure that stop height doesn't go below the initial height

									if stopHeight < state.InitialHeight {

										stopHeight = state.InitialHeight

										// this essentially makes stop time a void criteria for termination

										stopTime = state.LastBlockTime

									}

									return r.backfill(

										ctx,

										state.ChainID,

										state.LastBlockHeight,

										stopHeight,

										state.InitialHeight,

										state.LastBlockID,

										stopTime,

									)

								}


								func (r *Reactor) backfill(

									ctx context.Context,

									chainID string,

									startHeight, stopHeight, initialHeight int64,

									trustedBlockID types.BlockID,

									stopTime time.Time,

								) error {

									r.logger.Info("starting backfill process...", "startHeight", startHeight,

										"stopHeight", stopHeight, "stopTime", stopTime, "trustedBlockID", trustedBlockID)


									r.backfillBlockTotal = startHeight - stopHeight + 1

									r.metrics.BackFillBlocksTotal.Set(float64(r.backfillBlockTotal))


									const sleepTime = 1 * time.Second

									var (

										lastValidatorSet *types.ValidatorSet

										lastChangeHeight = startHeight

									)


									queue := newBlockQueue(startHeight, stopHeight, initialHeight, stopTime, maxLightBlockRequestRetries)


									// fetch light blocks across four workers. The aim with deploying concurrent

									// workers is to equate the network messaging time with the verification

									// time. Ideally we want the verification process to never have to be

									// waiting on blocks. If it takes 4s to retrieve a block and 1s to verify

									// it, then steady state involves four workers.

									for i := 0; i < int(r.cfg.Fetchers); i++ {

										ctxWithCancel, cancel := context.WithCancel(ctx)

										defer cancel()

										go func() {

											for {

												select {

												case <-ctx.Done():

													return

												case height := <-queue.nextHeight():

													// pop the next peer of the list to send a request to

													peer := r.peers.Pop(ctx)

													r.logger.Debug("fetching next block", "height", height, "peer", peer)

													subCtx, cancel := context.WithTimeout(ctxWithCancel, lightBlockResponseTimeout)

													defer cancel()

													lb, err := func() (*types.LightBlock, error) {

														defer cancel()

														// request the light block with a timeout

														return r.dispatcher.LightBlock(subCtx, height, peer)

													}()

													// once the peer has returned a value, add it back to the peer list to be used again

													r.peers.Append(peer)

													if errors.Is(err, context.Canceled) {

														return

													}

													if err != nil {

														queue.retry(height)

														if errors.Is(err, errNoConnectedPeers) {

															r.logger.Info("backfill: no connected peers to fetch light blocks from; sleeping...",

																"sleepTime", sleepTime)

															time.Sleep(sleepTime)

														} else {

															// we don't punish the peer as it might just have not responded in time

															r.logger.Info("backfill: error with fetching light block",

																"height", height, "err", err)

														}

														continue

													}

													if lb == nil {

														r.logger.Info("backfill: peer didn't have block, fetching from another peer", "height", height)

														queue.retry(height)

														// As we are fetching blocks backwards, if this node doesn't have the block it likely doesn't

														// have any prior ones, thus we remove it from the peer list.

														r.peers.Remove(peer)

														continue

													}


													// run a validate basic. This checks the validator set and commit

													// hashes line up

													err = lb.ValidateBasic(chainID)

													if err != nil || lb.Height != height {

														r.logger.Info("backfill: fetched light block failed validate basic, removing peer...",

															"err", err, "height", height)

														queue.retry(height)

														if serr := r.blockCh.SendError(ctx, p2p.PeerError{

															NodeID: peer,

															Err:    fmt.Errorf("received invalid light block: %w", err),

														}); serr != nil {

															return

														}

														continue

													}


													// add block to queue to be verified

													queue.add(lightBlockResponse{

														block: lb,

														peer:  peer,

													})

													r.logger.Debug("backfill: added light block to processing queue", "height", height)


												case <-queue.done():

													return

												}

											}

										}()

									}


									// verify all light blocks

									for {

										select {

										case <-ctx.Done():

											queue.close()

											return nil

										case resp := <-queue.verifyNext():

											// validate the header hash. We take the last block id of the

											// previous header (i.e. one height above) as the trusted hash which

											// we equate to. ValidatorsHash and CommitHash have already been

											// checked in the `ValidateBasic`

											if w, g := trustedBlockID.Hash, resp.block.Hash(); !bytes.Equal(w, g) {

												r.logger.Info("received invalid light block. header hash doesn't match trusted LastBlockID",

													"trustedHash", w, "receivedHash", g, "height", resp.block.Height)

												if err := r.blockCh.SendError(ctx, p2p.PeerError{

													NodeID: resp.peer,

													Err:    fmt.Errorf("received invalid light block. Expected hash %v, got: %v", w, g),

												}); err != nil {

													return nil

												}

												queue.retry(resp.block.Height)

												continue

											}


											// save the signed headers

											if err := r.blockStore.SaveSignedHeader(resp.block.SignedHeader, trustedBlockID); err != nil {

												return err

											}


											// check if there has been a change in the validator set

											if lastValidatorSet != nil && !bytes.Equal(resp.block.Header.ValidatorsHash, resp.block.Header.NextValidatorsHash) {

												// save all the heights that the last validator set was the same

												if err := r.stateStore.SaveValidatorSets(resp.block.Height+1, lastChangeHeight, lastValidatorSet); err != nil {

													return err

												}


												// update the lastChangeHeight

												lastChangeHeight = resp.block.Height

											}


											trustedBlockID = resp.block.LastBlockID

											queue.success()

											r.logger.Info("backfill: verified and stored light block", "height", resp.block.Height)


											lastValidatorSet = resp.block.ValidatorSet


											r.backfilledBlocks++

											r.metrics.BackFilledBlocks.Add(1)


											// The block height might be less than the stopHeight because of the stopTime condition

											// hasn't been fulfilled.

											if resp.block.Height < stopHeight {

												r.backfillBlockTotal++

												r.metrics.BackFillBlocksTotal.Set(float64(r.backfillBlockTotal))

											}


										case <-queue.done():

											if err := queue.error(); err != nil {

												return err

											}


											// save the final batch of validators

											if err := r.stateStore.SaveValidatorSets(queue.terminal.Height, lastChangeHeight, lastValidatorSet); err != nil {

												return err

											}


											r.logger.Info("successfully completed backfill process", "endHeight", queue.terminal.Height)

											return nil

										}

									}

								}


								// handleSnapshotMessage handles envelopes sent from peers on the

								// SnapshotChannel. It returns an error only if the Envelope.Message is unknown

								// for this channel. This should never be called outside of handleMessage.

								func (r *Reactor) handleSnapshotMessage(ctx context.Context, envelope *p2p.Envelope) error {

									logger := r.logger.With("peer", envelope.From)


									switch msg := envelope.Message.(type) {

									case *ssproto.SnapshotsRequest:

										snapshots, err := r.recentSnapshots(ctx, recentSnapshots)

										if err != nil {

											logger.Error("failed to fetch snapshots", "err", err)

											return nil

										}


										for _, snapshot := range snapshots {

											logger.Info(

												"advertising snapshot",

												"height", snapshot.Height,

												"format", snapshot.Format,

												"peer", envelope.From,

											)


											if err := r.snapshotCh.Send(ctx, p2p.Envelope{

												To: envelope.From,

												Message: &ssproto.SnapshotsResponse{

													Height:   snapshot.Height,

													Format:   snapshot.Format,

													Chunks:   snapshot.Chunks,

													Hash:     snapshot.Hash,

													Metadata: snapshot.Metadata,

												},

											}); err != nil {

												return err

											}

										}


									case *ssproto.SnapshotsResponse:

										r.mtx.RLock()

										defer r.mtx.RUnlock()


										if r.syncer == nil {

											logger.Debug("received unexpected snapshot; no state sync in progress")

											return nil

										}


										logger.Info("received snapshot", "height", msg.Height, "format", msg.Format)

										_, err := r.syncer.AddSnapshot(envelope.From, &snapshot{

											Height:   msg.Height,

											Format:   msg.Format,

											Chunks:   msg.Chunks,

											Hash:     msg.Hash,

											Metadata: msg.Metadata,

										})

										if err != nil {

											logger.Error(

												"failed to add snapshot",

												"height", msg.Height,

												"format", msg.Format,

												"err", err,

												"channel", r.snapshotCh.ID,

											)

											return nil

										}

										logger.Info("added snapshot", "height", msg.Height, "format", msg.Format)


									default:

										return fmt.Errorf("received unknown message: %T", msg)

									}


									return nil

								}


								// handleChunkMessage handles envelopes sent from peers on the ChunkChannel.

								// It returns an error only if the Envelope.Message is unknown for this channel.

								// This should never be called outside of handleMessage.

								func (r *Reactor) handleChunkMessage(ctx context.Context, envelope *p2p.Envelope) error {

									switch msg := envelope.Message.(type) {

									case *ssproto.ChunkRequest:

										r.logger.Debug(

											"received chunk request",

											"height", msg.Height,

											"format", msg.Format,

											"chunk", msg.Index,

											"peer", envelope.From,

										)

										resp, err := r.conn.LoadSnapshotChunkSync(ctx, abci.RequestLoadSnapshotChunk{

											Height: msg.Height,

											Format: msg.Format,

											Chunk:  msg.Index,

										})

										if err != nil {

											r.logger.Error(

												"failed to load chunk",

												"height", msg.Height,

												"format", msg.Format,

												"chunk", msg.Index,

												"err", err,

												"peer", envelope.From,

											)

											return nil

										}


										r.logger.Debug(

											"sending chunk",

											"height", msg.Height,

											"format", msg.Format,

											"chunk", msg.Index,

											"peer", envelope.From,

										)

										if err := r.chunkCh.Send(ctx, p2p.Envelope{

											To: envelope.From,

											Message: &ssproto.ChunkResponse{

												Height:  msg.Height,

												Format:  msg.Format,

												Index:   msg.Index,

												Chunk:   resp.Chunk,

												Missing: resp.Chunk == nil,

											},

										}); err != nil {

											return err

										}


									case *ssproto.ChunkResponse:

										r.mtx.RLock()

										defer r.mtx.RUnlock()


										if r.syncer == nil {

											r.logger.Debug("received unexpected chunk; no state sync in progress", "peer", envelope.From)

											return nil

										}


										r.logger.Debug(

											"received chunk; adding to sync",

											"height", msg.Height,

											"format", msg.Format,

											"chunk", msg.Index,

											"peer", envelope.From,

										)

										_, err := r.syncer.AddChunk(&chunk{

											Height: msg.Height,

											Format: msg.Format,

											Index:  msg.Index,

											Chunk:  msg.Chunk,

											Sender: envelope.From,

										})

										if err != nil {

											r.logger.Error(

												"failed to add chunk",

												"height", msg.Height,

												"format", msg.Format,

												"chunk", msg.Index,

												"err", err,

												"peer", envelope.From,

											)

											return nil

										}


									default:

										return fmt.Errorf("received unknown message: %T", msg)

									}


									return nil

								}


								func (r *Reactor) handleLightBlockMessage(ctx context.Context, envelope *p2p.Envelope) error {

									switch msg := envelope.Message.(type) {

									case *ssproto.LightBlockRequest:

										r.logger.Info("received light block request", "height", msg.Height)

										lb, err := r.fetchLightBlock(msg.Height)

										if err != nil {

											r.logger.Error("failed to retrieve light block", "err", err, "height", msg.Height)

											return err

										}

										if lb == nil {

											if err := r.blockCh.Send(ctx, p2p.Envelope{

												To: envelope.From,

												Message: &ssproto.LightBlockResponse{

													LightBlock: nil,

												},

											}); err != nil {

												return err

											}

											return nil

										}


										lbproto, err := lb.ToProto()

										if err != nil {

											r.logger.Error("marshaling light block to proto", "err", err)

											return nil

										}


										// NOTE: If we don't have the light block we will send a nil light block

										// back to the requested node, indicating that we don't have it.

										if err := r.blockCh.Send(ctx, p2p.Envelope{

											To: envelope.From,

											Message: &ssproto.LightBlockResponse{

												LightBlock: lbproto,

											},

										}); err != nil {

											return err

										}

									case *ssproto.LightBlockResponse:

										var height int64

										if msg.LightBlock != nil {

											height = msg.LightBlock.SignedHeader.Header.Height

										}

										r.logger.Info("received light block response", "peer", envelope.From, "height", height)

										if err := r.dispatcher.Respond(msg.LightBlock, envelope.From); err != nil {

											r.logger.Error("error processing light block response", "err", err, "height", height)

										}


									default:

										return fmt.Errorf("received unknown message: %T", msg)

									}


									return nil

								}


								func (r *Reactor) handleParamsMessage(ctx context.Context, envelope *p2p.Envelope) error {

									switch msg := envelope.Message.(type) {

									case *ssproto.ParamsRequest:

										r.logger.Debug("received consensus params request", "height", msg.Height)

										cp, err := r.stateStore.LoadConsensusParams(int64(msg.Height))

										if err != nil {

											r.logger.Error("failed to fetch requested consensus params", "err", err, "height", msg.Height)

											return nil

										}


										cpproto := cp.ToProto()

										if err := r.paramsCh.Send(ctx, p2p.Envelope{

											To: envelope.From,

											Message: &ssproto.ParamsResponse{

												Height:          msg.Height,

												ConsensusParams: cpproto,

											},

										}); err != nil {

											return err

										}

									case *ssproto.ParamsResponse:

										r.mtx.RLock()

										defer r.mtx.RUnlock()

										r.logger.Debug("received consensus params response", "height", msg.Height)


										cp := types.ConsensusParamsFromProto(msg.ConsensusParams)


										if sp, ok := r.stateProvider.(*stateProviderP2P); ok {

											select {

											case sp.paramsRecvCh <- cp:

											case <-time.After(time.Second):

												return errors.New("failed to send consensus params, stateprovider not ready for response")

											}

										} else {

											r.logger.Debug("received unexpected params response; using RPC state provider", "peer", envelope.From)

										}


									default:

										return fmt.Errorf("received unknown message: %T", msg)

									}


									return nil

								}


								// handleMessage handles an Envelope sent from a peer on a specific p2p Channel.

								// It will handle errors and any possible panics gracefully. A caller can handle

								// any error returned by sending a PeerError on the respective channel.

								func (r *Reactor) handleMessage(ctx context.Context, chID p2p.ChannelID, envelope *p2p.Envelope) (err error) {

									defer func() {

										if e := recover(); e != nil {

											err = fmt.Errorf("panic in processing message: %v", e)

											r.logger.Error(

												"recovering from processing message panic",

												"err", err,

												"stack", string(debug.Stack()),

											)

										}

									}()


									r.logger.Debug("received message", "message", reflect.TypeOf(envelope.Message), "peer", envelope.From)


									switch chID {

									case SnapshotChannel:

										err = r.handleSnapshotMessage(ctx, envelope)

									case ChunkChannel:

										err = r.handleChunkMessage(ctx, envelope)

									case LightBlockChannel:

										err = r.handleLightBlockMessage(ctx, envelope)

									case ParamsChannel:

										err = r.handleParamsMessage(ctx, envelope)

									default:

										err = fmt.Errorf("unknown channel ID (%d) for envelope (%v)", chID, envelope)

									}


									return err

								}


								// processCh routes state sync messages to their respective handlers. Any error

								// encountered during message execution will result in a PeerError being sent on

								// the respective channel. When the reactor is stopped, we will catch the signal

								// and close the p2p Channel gracefully.

								func (r *Reactor) processCh(ctx context.Context, ch *p2p.Channel, chName string) {

									iter := ch.Receive(ctx)

									for iter.Next(ctx) {

										envelope := iter.Envelope()

										if err := r.handleMessage(ctx, ch.ID, envelope); err != nil {

											r.logger.Error("failed to process message",

												"err", err,

												"channel", chName,

												"ch_id", ch.ID,

												"envelope", envelope)

											if serr := ch.SendError(ctx, p2p.PeerError{

												NodeID: envelope.From,

												Err:    err,

											}); serr != nil {

												return

											}

										}

									}

								}


								// processPeerUpdate processes a PeerUpdate, returning an error upon failing to

								// handle the PeerUpdate or if a panic is recovered.

								func (r *Reactor) processPeerUpdate(ctx context.Context, peerUpdate p2p.PeerUpdate) {

									r.logger.Info("received peer update", "peer", peerUpdate.NodeID, "status", peerUpdate.Status)


									switch peerUpdate.Status {

									case p2p.PeerStatusUp:

										r.peers.Append(peerUpdate.NodeID)

									case p2p.PeerStatusDown:

										r.peers.Remove(peerUpdate.NodeID)

									}


									r.mtx.Lock()

									defer r.mtx.Unlock()

									if r.syncer == nil {

										return

									}


									switch peerUpdate.Status {

									case p2p.PeerStatusUp:

										newProvider := NewBlockProvider(peerUpdate.NodeID, r.chainID, r.dispatcher)

										r.providers[peerUpdate.NodeID] = newProvider

										err := r.syncer.AddPeer(ctx, peerUpdate.NodeID)

										if err != nil {

											r.logger.Error("error adding peer to syncer", "error", err)

											return

										}

										if sp, ok := r.stateProvider.(*stateProviderP2P); ok {

											// we do this in a separate routine to not block whilst waiting for the light client to finish

											// whatever call it's currently executing

											go sp.addProvider(newProvider)

										}


									case p2p.PeerStatusDown:

										delete(r.providers, peerUpdate.NodeID)

										r.syncer.RemovePeer(peerUpdate.NodeID)

									}

									r.logger.Info("processed peer update", "peer", peerUpdate.NodeID, "status", peerUpdate.Status)

								}


								// processPeerUpdates initiates a blocking process where we listen for and handle

								// PeerUpdate messages. When the reactor is stopped, we will catch the signal and

								// close the p2p PeerUpdatesCh gracefully.

								func (r *Reactor) processPeerUpdates(ctx context.Context) {

									for {

										select {

										case <-ctx.Done():

											r.logger.Debug("stopped listening on peer updates channel; closing...")

											return

										case peerUpdate := <-r.peerUpdates.Updates():

											r.processPeerUpdate(ctx, peerUpdate)

										}

									}

								}


								// recentSnapshots fetches the n most recent snapshots from the app

								func (r *Reactor) recentSnapshots(ctx context.Context, n uint32) ([]*snapshot, error) {

									resp, err := r.conn.ListSnapshotsSync(ctx, abci.RequestListSnapshots{})

									if err != nil {

										return nil, err

									}


									sort.Slice(resp.Snapshots, func(i, j int) bool {

										a := resp.Snapshots[i]

										b := resp.Snapshots[j]


										switch {

										case a.Height > b.Height:

											return true

										case a.Height == b.Height && a.Format > b.Format:

											return true

										default:

											return false

										}

									})


									snapshots := make([]*snapshot, 0, n)

									for i, s := range resp.Snapshots {

										if i >= recentSnapshots {

											break

										}


										snapshots = append(snapshots, &snapshot{

											Height:   s.Height,

											Format:   s.Format,

											Chunks:   s.Chunks,

											Hash:     s.Hash,

											Metadata: s.Metadata,

										})

									}


									return snapshots, nil

								}


								// fetchLightBlock works out whether the node has a light block at a particular

								// height and if so returns it so it can be gossiped to peers

								func (r *Reactor) fetchLightBlock(height uint64) (*types.LightBlock, error) {

									h := int64(height)


									blockMeta := r.blockStore.LoadBlockMeta(h)

									if blockMeta == nil {

										return nil, nil

									}


									commit := r.blockStore.LoadBlockCommit(h)

									if commit == nil {

										return nil, nil

									}


									vals, err := r.stateStore.LoadValidators(h)

									if err != nil {

										return nil, err

									}

									if vals == nil {

										return nil, nil

									}


									return &types.LightBlock{

										SignedHeader: &types.SignedHeader{

											Header: &blockMeta.Header,

											Commit: commit,

										},

										ValidatorSet: vals,

									}, nil

								}


								func (r *Reactor) waitForEnoughPeers(ctx context.Context, numPeers int) error {

									startAt := time.Now()

									t := time.NewTicker(100 * time.Millisecond)

									defer t.Stop()

									logT := time.NewTicker(time.Minute)

									defer logT.Stop()

									var iter int

									for r.peers.Len() < numPeers {

										iter++

										select {

										case <-ctx.Done():

											return fmt.Errorf("operation canceled while waiting for peers after %.2fs [%d/%d]",

												time.Since(startAt).Seconds(), r.peers.Len(), numPeers)

										case <-t.C:

											continue

										case <-logT.C:

											r.logger.Info("waiting for sufficient peers to start statesync",

												"duration", time.Since(startAt).String(),

												"target", numPeers,

												"peers", r.peers.Len(),

												"iters", iter,

											)

											continue

										}

									}

									return nil

								}


								func (r *Reactor) initStateProvider(ctx context.Context, chainID string, initialHeight int64) error {

									var err error

									to := light.TrustOptions{

										Period: r.cfg.TrustPeriod,

										Height: r.cfg.TrustHeight,

										Hash:   r.cfg.TrustHashBytes(),

									}

									spLogger := r.logger.With("module", "stateprovider")

									spLogger.Info("initializing state provider", "trustPeriod", to.Period,

										"trustHeight", to.Height, "useP2P", r.cfg.UseP2P)


									if r.cfg.UseP2P {

										if err := r.waitForEnoughPeers(ctx, 2); err != nil {

											return err

										}


										peers := r.peers.All()

										providers := make([]provider.Provider, len(peers))

										for idx, p := range peers {

											providers[idx] = NewBlockProvider(p, chainID, r.dispatcher)

										}


										r.stateProvider, err = NewP2PStateProvider(ctx, chainID, initialHeight, providers, to, r.paramsCh, spLogger)

										if err != nil {

											return fmt.Errorf("failed to initialize P2P state provider: %w", err)

										}

									} else {

										r.stateProvider, err = NewRPCStateProvider(ctx, chainID, initialHeight, r.cfg.RPCServers, to, spLogger)

										if err != nil {

											return fmt.Errorf("failed to initialize RPC state provider: %w", err)

										}

									}

									return nil

								}


								func (r *Reactor) TotalSnapshots() int64 {

									r.mtx.RLock()

									defer r.mtx.RUnlock()


									if r.syncer != nil && r.syncer.snapshots != nil {

										return int64(len(r.syncer.snapshots.snapshots))

									}

									return 0

								}


								func (r *Reactor) ChunkProcessAvgTime() time.Duration {

									r.mtx.RLock()

									defer r.mtx.RUnlock()


									if r.syncer != nil {

										return time.Duration(r.syncer.avgChunkTime)

									}

									return time.Duration(0)

								}


								func (r *Reactor) SnapshotHeight() int64 {

									r.mtx.RLock()

									defer r.mtx.RUnlock()


									if r.syncer != nil {

										return r.syncer.lastSyncedSnapshotHeight

									}

									return 0

								}

								func (r *Reactor) SnapshotChunksCount() int64 {

									r.mtx.RLock()

									defer r.mtx.RUnlock()


									if r.syncer != nil && r.syncer.chunks != nil {

										return int64(r.syncer.chunks.numChunksReturned())

									}

									return 0

								}


								func (r *Reactor) SnapshotChunksTotal() int64 {

									r.mtx.RLock()

									defer r.mtx.RUnlock()


									if r.syncer != nil && r.syncer.processingSnapshot != nil {

										return int64(r.syncer.processingSnapshot.Chunks)

									}

									return 0

								}


								func (r *Reactor) BackFilledBlocks() int64 {

									r.mtx.RLock()

									defer r.mtx.RUnlock()


									return r.backfilledBlocks

								}


								func (r *Reactor) BackFillBlocksTotal() int64 {

									r.mtx.RLock()

									defer r.mtx.RUnlock()


									return r.backfillBlockTotal

								}