package consensus
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"runtime/debug"
|
|
"sync"
|
|
"time"
|
|
|
|
cstypes "github.com/tendermint/tendermint/internal/consensus/types"
|
|
"github.com/tendermint/tendermint/internal/eventbus"
|
|
"github.com/tendermint/tendermint/internal/p2p"
|
|
sm "github.com/tendermint/tendermint/internal/state"
|
|
"github.com/tendermint/tendermint/libs/bits"
|
|
tmevents "github.com/tendermint/tendermint/libs/events"
|
|
"github.com/tendermint/tendermint/libs/log"
|
|
"github.com/tendermint/tendermint/libs/service"
|
|
tmtime "github.com/tendermint/tendermint/libs/time"
|
|
tmcons "github.com/tendermint/tendermint/proto/tendermint/consensus"
|
|
tmproto "github.com/tendermint/tendermint/proto/tendermint/types"
|
|
"github.com/tendermint/tendermint/types"
|
|
)
|
|
|
|
var (
|
|
_ service.Service = (*Reactor)(nil)
|
|
_ p2p.Wrapper = (*tmcons.Message)(nil)
|
|
)
|
|
|
|
// GetChannelDescriptor produces an instance of a descriptor for this
|
|
// package's required channels.
|
|
func getChannelDescriptors() map[p2p.ChannelID]*p2p.ChannelDescriptor {
|
|
return map[p2p.ChannelID]*p2p.ChannelDescriptor{
|
|
StateChannel: {
|
|
ID: StateChannel,
|
|
MessageType: new(tmcons.Message),
|
|
Priority: 8,
|
|
SendQueueCapacity: 64,
|
|
RecvMessageCapacity: maxMsgSize,
|
|
RecvBufferCapacity: 128,
|
|
},
|
|
DataChannel: {
|
|
// TODO: Consider a split between gossiping current block and catchup
|
|
// stuff. Once we gossip the whole block there is nothing left to send
|
|
// until next height or round.
|
|
ID: DataChannel,
|
|
MessageType: new(tmcons.Message),
|
|
Priority: 12,
|
|
SendQueueCapacity: 64,
|
|
RecvBufferCapacity: 512,
|
|
RecvMessageCapacity: maxMsgSize,
|
|
},
|
|
VoteChannel: {
|
|
ID: VoteChannel,
|
|
MessageType: new(tmcons.Message),
|
|
Priority: 10,
|
|
SendQueueCapacity: 64,
|
|
RecvBufferCapacity: 128,
|
|
RecvMessageCapacity: maxMsgSize,
|
|
},
|
|
VoteSetBitsChannel: {
|
|
ID: VoteSetBitsChannel,
|
|
MessageType: new(tmcons.Message),
|
|
Priority: 5,
|
|
SendQueueCapacity: 8,
|
|
RecvBufferCapacity: 128,
|
|
RecvMessageCapacity: maxMsgSize,
|
|
},
|
|
}
|
|
}
|
|
|
|
const (
|
|
StateChannel = p2p.ChannelID(0x20)
|
|
DataChannel = p2p.ChannelID(0x21)
|
|
VoteChannel = p2p.ChannelID(0x22)
|
|
VoteSetBitsChannel = p2p.ChannelID(0x23)
|
|
|
|
maxMsgSize = 1048576 // 1MB; NOTE: keep in sync with types.PartSet sizes.
|
|
|
|
blocksToContributeToBecomeGoodPeer = 10000
|
|
votesToContributeToBecomeGoodPeer = 10000
|
|
|
|
listenerIDConsensus = "consensus-reactor"
|
|
)
|
|
|
|
// NOTE: Temporary interface for switching to block sync, we should get rid of v0.
|
|
// See: https://github.com/tendermint/tendermint/issues/4595
|
|
type BlockSyncReactor interface {
|
|
SwitchToBlockSync(context.Context, sm.State) error
|
|
|
|
GetMaxPeerBlockHeight() int64
|
|
|
|
// GetTotalSyncedTime returns the time duration since the blocksync starting.
|
|
GetTotalSyncedTime() time.Duration
|
|
|
|
// GetRemainingSyncTime returns the estimating time the node will be fully synced,
|
|
// if will return 0 if the blocksync does not perform or the number of block synced is
|
|
// too small (less than 100).
|
|
GetRemainingSyncTime() time.Duration
|
|
}
|
|
|
|
//go:generate ../../scripts/mockery_generate.sh ConsSyncReactor
|
|
// ConsSyncReactor defines an interface used for testing abilities of node.startStateSync.
|
|
type ConsSyncReactor interface {
|
|
SwitchToConsensus(sm.State, bool)
|
|
SetStateSyncingMetrics(float64)
|
|
SetBlockSyncingMetrics(float64)
|
|
}
|
|
|
|
// Reactor defines a reactor for the consensus service.
|
|
type Reactor struct {
|
|
service.BaseService
|
|
logger log.Logger
|
|
|
|
state *State
|
|
eventBus *eventbus.EventBus
|
|
Metrics *Metrics
|
|
|
|
mtx sync.RWMutex
|
|
peers map[types.NodeID]*PeerState
|
|
waitSync bool
|
|
|
|
stateCh *p2p.Channel
|
|
dataCh *p2p.Channel
|
|
voteCh *p2p.Channel
|
|
voteSetBitsCh *p2p.Channel
|
|
peerUpdates *p2p.PeerUpdates
|
|
}
|
|
|
|
// NewReactor returns a reference to a new consensus reactor, which implements
|
|
// the service.Service interface. It accepts a logger, consensus state, references
|
|
// to relevant p2p Channels and a channel to listen for peer updates on. The
|
|
// reactor will close all p2p Channels when stopping.
|
|
func NewReactor(
|
|
ctx context.Context,
|
|
logger log.Logger,
|
|
cs *State,
|
|
channelCreator p2p.ChannelCreator,
|
|
peerUpdates *p2p.PeerUpdates,
|
|
waitSync bool,
|
|
metrics *Metrics,
|
|
) (*Reactor, error) {
|
|
chans := getChannelDescriptors()
|
|
stateCh, err := channelCreator(ctx, chans[StateChannel])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
dataCh, err := channelCreator(ctx, chans[DataChannel])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
voteCh, err := channelCreator(ctx, chans[VoteChannel])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
voteSetBitsCh, err := channelCreator(ctx, chans[VoteSetBitsChannel])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
r := &Reactor{
|
|
logger: logger,
|
|
state: cs,
|
|
waitSync: waitSync,
|
|
peers: make(map[types.NodeID]*PeerState),
|
|
Metrics: metrics,
|
|
stateCh: stateCh,
|
|
dataCh: dataCh,
|
|
voteCh: voteCh,
|
|
voteSetBitsCh: voteSetBitsCh,
|
|
peerUpdates: peerUpdates,
|
|
}
|
|
r.BaseService = *service.NewBaseService(logger, "Consensus", r)
|
|
|
|
return r, nil
|
|
}
|
|
|
|
// OnStart starts separate go routines for each p2p Channel and listens for
|
|
// envelopes on each. In addition, it also listens for peer updates and handles
|
|
// messages on that p2p channel accordingly. The caller must be sure to execute
|
|
// OnStop to ensure the outbound p2p Channels are closed.
|
|
func (r *Reactor) OnStart(ctx context.Context) error {
|
|
r.logger.Debug("consensus wait sync", "wait_sync", r.WaitSync())
|
|
|
|
// start routine that computes peer statistics for evaluating peer quality
|
|
//
|
|
// TODO: Evaluate if we need this to be synchronized via WaitGroup as to not
|
|
// leak the goroutine when stopping the reactor.
|
|
go r.peerStatsRoutine(ctx)
|
|
|
|
r.subscribeToBroadcastEvents()
|
|
|
|
if !r.WaitSync() {
|
|
if err := r.state.Start(ctx); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
go r.processStateCh(ctx)
|
|
go r.processDataCh(ctx)
|
|
go r.processVoteCh(ctx)
|
|
go r.processVoteSetBitsCh(ctx)
|
|
go r.processPeerUpdates(ctx)
|
|
|
|
return nil
|
|
}
|
|
|
|
// OnStop stops the reactor by signaling to all spawned goroutines to exit and
|
|
// blocking until they all exit, as well as unsubscribing from events and stopping
|
|
// state.
|
|
func (r *Reactor) OnStop() {
|
|
r.unsubscribeFromBroadcastEvents()
|
|
|
|
if err := r.state.Stop(); err != nil {
|
|
if !errors.Is(err, service.ErrAlreadyStopped) {
|
|
r.logger.Error("failed to stop consensus state", "err", err)
|
|
}
|
|
}
|
|
|
|
if !r.WaitSync() {
|
|
r.state.Wait()
|
|
}
|
|
|
|
r.mtx.Lock()
|
|
// Close and wait for each of the peers to shutdown.
|
|
// This is safe to perform with the lock since none of the peers require the
|
|
// lock to complete any of the methods that the waitgroup is waiting on.
|
|
for _, state := range r.peers {
|
|
state.closer.Close()
|
|
state.broadcastWG.Wait()
|
|
}
|
|
r.mtx.Unlock()
|
|
}
|
|
|
|
// SetEventBus sets the reactor's event bus.
|
|
func (r *Reactor) SetEventBus(b *eventbus.EventBus) {
|
|
r.eventBus = b
|
|
r.state.SetEventBus(b)
|
|
}
|
|
|
|
// WaitSync returns whether the consensus reactor is waiting for state/block sync.
|
|
func (r *Reactor) WaitSync() bool {
|
|
r.mtx.RLock()
|
|
defer r.mtx.RUnlock()
|
|
|
|
return r.waitSync
|
|
}
|
|
|
|
// SwitchToConsensus switches from block-sync mode to consensus mode. It resets
|
|
// the state, turns off block-sync, and starts the consensus state-machine.
|
|
func (r *Reactor) SwitchToConsensus(ctx context.Context, state sm.State, skipWAL bool) {
|
|
r.logger.Info("switching to consensus")
|
|
|
|
// we have no votes, so reconstruct LastCommit from SeenCommit
|
|
if state.LastBlockHeight > 0 {
|
|
r.state.reconstructLastCommit(state)
|
|
}
|
|
|
|
// NOTE: The line below causes broadcastNewRoundStepRoutine() to broadcast a
|
|
// NewRoundStepMessage.
|
|
r.state.updateToState(ctx, state)
|
|
|
|
r.mtx.Lock()
|
|
r.waitSync = false
|
|
r.mtx.Unlock()
|
|
|
|
r.Metrics.BlockSyncing.Set(0)
|
|
r.Metrics.StateSyncing.Set(0)
|
|
|
|
if skipWAL {
|
|
r.state.doWALCatchup = false
|
|
}
|
|
|
|
if err := r.state.Start(ctx); err != nil {
|
|
panic(fmt.Sprintf(`failed to start consensus state: %v
|
|
|
|
conS:
|
|
%+v
|
|
|
|
conR:
|
|
%+v`, err, r.state, r))
|
|
}
|
|
|
|
d := types.EventDataBlockSyncStatus{Complete: true, Height: state.LastBlockHeight}
|
|
if err := r.eventBus.PublishEventBlockSyncStatus(ctx, d); err != nil {
|
|
r.logger.Error("failed to emit the blocksync complete event", "err", err)
|
|
}
|
|
}
|
|
|
|
// String returns a string representation of the Reactor.
|
|
//
|
|
// NOTE: For now, it is just a hard-coded string to avoid accessing unprotected
|
|
// shared variables.
|
|
//
|
|
// TODO: improve!
|
|
func (r *Reactor) String() string {
|
|
return "ConsensusReactor"
|
|
}
|
|
|
|
// StringIndented returns an indented string representation of the Reactor.
|
|
func (r *Reactor) StringIndented(indent string) string {
|
|
r.mtx.RLock()
|
|
defer r.mtx.RUnlock()
|
|
|
|
s := "ConsensusReactor{\n"
|
|
s += indent + " " + r.state.StringIndented(indent+" ") + "\n"
|
|
|
|
for _, ps := range r.peers {
|
|
s += indent + " " + ps.StringIndented(indent+" ") + "\n"
|
|
}
|
|
|
|
s += indent + "}"
|
|
return s
|
|
}
|
|
|
|
// GetPeerState returns PeerState for a given NodeID.
|
|
func (r *Reactor) GetPeerState(peerID types.NodeID) (*PeerState, bool) {
|
|
r.mtx.RLock()
|
|
defer r.mtx.RUnlock()
|
|
|
|
ps, ok := r.peers[peerID]
|
|
return ps, ok
|
|
}
|
|
|
|
func (r *Reactor) broadcastNewRoundStepMessage(ctx context.Context, rs *cstypes.RoundState) error {
|
|
return r.stateCh.Send(ctx, p2p.Envelope{
|
|
Broadcast: true,
|
|
Message: makeRoundStepMessage(rs),
|
|
})
|
|
}
|
|
|
|
func (r *Reactor) broadcastNewValidBlockMessage(ctx context.Context, rs *cstypes.RoundState) error {
|
|
psHeader := rs.ProposalBlockParts.Header()
|
|
return r.stateCh.Send(ctx, p2p.Envelope{
|
|
Broadcast: true,
|
|
Message: &tmcons.NewValidBlock{
|
|
Height: rs.Height,
|
|
Round: rs.Round,
|
|
BlockPartSetHeader: psHeader.ToProto(),
|
|
BlockParts: rs.ProposalBlockParts.BitArray().ToProto(),
|
|
IsCommit: rs.Step == cstypes.RoundStepCommit,
|
|
},
|
|
})
|
|
}
|
|
|
|
func (r *Reactor) broadcastHasVoteMessage(ctx context.Context, vote *types.Vote) error {
|
|
return r.stateCh.Send(ctx, p2p.Envelope{
|
|
Broadcast: true,
|
|
Message: &tmcons.HasVote{
|
|
Height: vote.Height,
|
|
Round: vote.Round,
|
|
Type: vote.Type,
|
|
Index: vote.ValidatorIndex,
|
|
},
|
|
})
|
|
}
|
|
|
|
// subscribeToBroadcastEvents subscribes for new round steps and votes using the
|
|
// internal pubsub defined in the consensus state to broadcast them to peers
|
|
// upon receiving.
|
|
func (r *Reactor) subscribeToBroadcastEvents() {
|
|
err := r.state.evsw.AddListenerForEvent(
|
|
listenerIDConsensus,
|
|
types.EventNewRoundStepValue,
|
|
func(ctx context.Context, data tmevents.EventData) error {
|
|
if err := r.broadcastNewRoundStepMessage(ctx, data.(*cstypes.RoundState)); err != nil {
|
|
return err
|
|
}
|
|
select {
|
|
case r.state.onStopCh <- data.(*cstypes.RoundState):
|
|
return nil
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
default:
|
|
return nil
|
|
}
|
|
},
|
|
)
|
|
if err != nil {
|
|
r.logger.Error("failed to add listener for events", "err", err)
|
|
}
|
|
|
|
err = r.state.evsw.AddListenerForEvent(
|
|
listenerIDConsensus,
|
|
types.EventValidBlockValue,
|
|
func(ctx context.Context, data tmevents.EventData) error {
|
|
return r.broadcastNewValidBlockMessage(ctx, data.(*cstypes.RoundState))
|
|
},
|
|
)
|
|
if err != nil {
|
|
r.logger.Error("failed to add listener for events", "err", err)
|
|
}
|
|
|
|
err = r.state.evsw.AddListenerForEvent(
|
|
listenerIDConsensus,
|
|
types.EventVoteValue,
|
|
func(ctx context.Context, data tmevents.EventData) error {
|
|
return r.broadcastHasVoteMessage(ctx, data.(*types.Vote))
|
|
},
|
|
)
|
|
if err != nil {
|
|
r.logger.Error("failed to add listener for events", "err", err)
|
|
}
|
|
}
|
|
|
|
func (r *Reactor) unsubscribeFromBroadcastEvents() {
|
|
r.state.evsw.RemoveListener(listenerIDConsensus)
|
|
}
|
|
|
|
func makeRoundStepMessage(rs *cstypes.RoundState) *tmcons.NewRoundStep {
|
|
return &tmcons.NewRoundStep{
|
|
Height: rs.Height,
|
|
Round: rs.Round,
|
|
Step: uint32(rs.Step),
|
|
SecondsSinceStartTime: int64(time.Since(rs.StartTime).Seconds()),
|
|
LastCommitRound: rs.LastCommit.GetRound(),
|
|
}
|
|
}
|
|
|
|
func (r *Reactor) sendNewRoundStepMessage(ctx context.Context, peerID types.NodeID) error {
|
|
return r.stateCh.Send(ctx, p2p.Envelope{
|
|
To: peerID,
|
|
Message: makeRoundStepMessage(r.state.GetRoundState()),
|
|
})
|
|
}
|
|
|
|
func (r *Reactor) gossipDataForCatchup(ctx context.Context, rs *cstypes.RoundState, prs *cstypes.PeerRoundState, ps *PeerState) {
|
|
logger := r.logger.With("height", prs.Height).With("peer", ps.peerID)
|
|
|
|
if index, ok := prs.ProposalBlockParts.Not().PickRandom(); ok {
|
|
// ensure that the peer's PartSetHeader is correct
|
|
blockMeta := r.state.blockStore.LoadBlockMeta(prs.Height)
|
|
if blockMeta == nil {
|
|
logger.Error(
|
|
"failed to load block meta",
|
|
"our_height", rs.Height,
|
|
"blockstore_base", r.state.blockStore.Base(),
|
|
"blockstore_height", r.state.blockStore.Height(),
|
|
)
|
|
|
|
time.Sleep(r.state.config.PeerGossipSleepDuration)
|
|
return
|
|
} else if !blockMeta.BlockID.PartSetHeader.Equals(prs.ProposalBlockPartSetHeader) {
|
|
logger.Info(
|
|
"peer ProposalBlockPartSetHeader mismatch; sleeping",
|
|
"block_part_set_header", blockMeta.BlockID.PartSetHeader,
|
|
"peer_block_part_set_header", prs.ProposalBlockPartSetHeader,
|
|
)
|
|
|
|
time.Sleep(r.state.config.PeerGossipSleepDuration)
|
|
return
|
|
}
|
|
|
|
part := r.state.blockStore.LoadBlockPart(prs.Height, index)
|
|
if part == nil {
|
|
logger.Error(
|
|
"failed to load block part",
|
|
"index", index,
|
|
"block_part_set_header", blockMeta.BlockID.PartSetHeader,
|
|
"peer_block_part_set_header", prs.ProposalBlockPartSetHeader,
|
|
)
|
|
|
|
time.Sleep(r.state.config.PeerGossipSleepDuration)
|
|
return
|
|
}
|
|
|
|
partProto, err := part.ToProto()
|
|
if err != nil {
|
|
logger.Error("failed to convert block part to proto", "err", err)
|
|
|
|
time.Sleep(r.state.config.PeerGossipSleepDuration)
|
|
return
|
|
}
|
|
|
|
logger.Debug("sending block part for catchup", "round", prs.Round, "index", index)
|
|
_ = r.dataCh.Send(ctx, p2p.Envelope{
|
|
To: ps.peerID,
|
|
Message: &tmcons.BlockPart{
|
|
Height: prs.Height, // not our height, so it does not matter.
|
|
Round: prs.Round, // not our height, so it does not matter
|
|
Part: *partProto,
|
|
},
|
|
})
|
|
|
|
return
|
|
}
|
|
|
|
time.Sleep(r.state.config.PeerGossipSleepDuration)
|
|
}
|
|
|
|
func (r *Reactor) gossipDataRoutine(ctx context.Context, ps *PeerState) {
|
|
logger := r.logger.With("peer", ps.peerID)
|
|
|
|
defer ps.broadcastWG.Done()
|
|
|
|
timer := time.NewTimer(0)
|
|
defer timer.Stop()
|
|
|
|
OUTER_LOOP:
|
|
for {
|
|
if !r.IsRunning() {
|
|
return
|
|
}
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-ps.closer.Done():
|
|
// The peer is marked for removal via a PeerUpdate as the doneCh was
|
|
// explicitly closed to signal we should exit.
|
|
return
|
|
|
|
default:
|
|
}
|
|
|
|
rs := r.state.GetRoundState()
|
|
prs := ps.GetRoundState()
|
|
|
|
// Send proposal Block parts?
|
|
if rs.ProposalBlockParts.HasHeader(prs.ProposalBlockPartSetHeader) {
|
|
if index, ok := rs.ProposalBlockParts.BitArray().Sub(prs.ProposalBlockParts.Copy()).PickRandom(); ok {
|
|
part := rs.ProposalBlockParts.GetPart(index)
|
|
partProto, err := part.ToProto()
|
|
if err != nil {
|
|
logger.Error("failed to convert block part to proto", "err", err)
|
|
return
|
|
}
|
|
|
|
logger.Debug("sending block part", "height", prs.Height, "round", prs.Round)
|
|
if err := r.dataCh.Send(ctx, p2p.Envelope{
|
|
To: ps.peerID,
|
|
Message: &tmcons.BlockPart{
|
|
Height: rs.Height, // this tells peer that this part applies to us
|
|
Round: rs.Round, // this tells peer that this part applies to us
|
|
Part: *partProto,
|
|
},
|
|
}); err != nil {
|
|
return
|
|
}
|
|
|
|
ps.SetHasProposalBlockPart(prs.Height, prs.Round, index)
|
|
continue OUTER_LOOP
|
|
}
|
|
}
|
|
|
|
// if the peer is on a previous height that we have, help catch up
|
|
blockStoreBase := r.state.blockStore.Base()
|
|
if blockStoreBase > 0 && 0 < prs.Height && prs.Height < rs.Height && prs.Height >= blockStoreBase {
|
|
heightLogger := logger.With("height", prs.Height)
|
|
|
|
// If we never received the commit message from the peer, the block parts
|
|
// will not be initialized.
|
|
if prs.ProposalBlockParts == nil {
|
|
blockMeta := r.state.blockStore.LoadBlockMeta(prs.Height)
|
|
if blockMeta == nil {
|
|
heightLogger.Error(
|
|
"failed to load block meta",
|
|
"blockstoreBase", blockStoreBase,
|
|
"blockstoreHeight", r.state.blockStore.Height(),
|
|
)
|
|
|
|
timer.Reset(r.state.config.PeerGossipSleepDuration)
|
|
select {
|
|
case <-timer.C:
|
|
case <-ctx.Done():
|
|
return
|
|
}
|
|
} else {
|
|
ps.InitProposalBlockParts(blockMeta.BlockID.PartSetHeader)
|
|
}
|
|
|
|
// Continue the loop since prs is a copy and not effected by this
|
|
// initialization.
|
|
continue OUTER_LOOP
|
|
}
|
|
|
|
r.gossipDataForCatchup(ctx, rs, prs, ps)
|
|
continue OUTER_LOOP
|
|
}
|
|
|
|
// if height and round don't match, sleep
|
|
if (rs.Height != prs.Height) || (rs.Round != prs.Round) {
|
|
timer.Reset(r.state.config.PeerGossipSleepDuration)
|
|
select {
|
|
case <-timer.C:
|
|
case <-ctx.Done():
|
|
return
|
|
}
|
|
continue OUTER_LOOP
|
|
}
|
|
|
|
// By here, height and round match.
|
|
// Proposal block parts were already matched and sent if any were wanted.
|
|
// (These can match on hash so the round doesn't matter)
|
|
// Now consider sending other things, like the Proposal itself.
|
|
|
|
// Send Proposal && ProposalPOL BitArray?
|
|
if rs.Proposal != nil && !prs.Proposal {
|
|
// Proposal: share the proposal metadata with peer.
|
|
{
|
|
propProto := rs.Proposal.ToProto()
|
|
|
|
logger.Debug("sending proposal", "height", prs.Height, "round", prs.Round)
|
|
if err := r.dataCh.Send(ctx, p2p.Envelope{
|
|
To: ps.peerID,
|
|
Message: &tmcons.Proposal{
|
|
Proposal: *propProto,
|
|
},
|
|
}); err != nil {
|
|
return
|
|
}
|
|
|
|
// NOTE: A peer might have received a different proposal message, so
|
|
// this Proposal msg will be rejected!
|
|
ps.SetHasProposal(rs.Proposal)
|
|
}
|
|
|
|
// ProposalPOL: lets peer know which POL votes we have so far. The peer
|
|
// must receive ProposalMessage first. Note, rs.Proposal was validated,
|
|
// so rs.Proposal.POLRound <= rs.Round, so we definitely have
|
|
// rs.Votes.Prevotes(rs.Proposal.POLRound).
|
|
if 0 <= rs.Proposal.POLRound {
|
|
pPol := rs.Votes.Prevotes(rs.Proposal.POLRound).BitArray()
|
|
pPolProto := pPol.ToProto()
|
|
|
|
logger.Debug("sending POL", "height", prs.Height, "round", prs.Round)
|
|
if err := r.dataCh.Send(ctx, p2p.Envelope{
|
|
To: ps.peerID,
|
|
Message: &tmcons.ProposalPOL{
|
|
Height: rs.Height,
|
|
ProposalPolRound: rs.Proposal.POLRound,
|
|
ProposalPol: *pPolProto,
|
|
},
|
|
}); err != nil {
|
|
return
|
|
}
|
|
}
|
|
|
|
continue OUTER_LOOP
|
|
}
|
|
|
|
// nothing to do -- sleep
|
|
timer.Reset(r.state.config.PeerGossipSleepDuration)
|
|
select {
|
|
case <-timer.C:
|
|
case <-ctx.Done():
|
|
return
|
|
}
|
|
continue OUTER_LOOP
|
|
}
|
|
}
|
|
|
|
// pickSendVote picks a vote and sends it to the peer. It will return true if
|
|
// there is a vote to send and false otherwise.
|
|
func (r *Reactor) pickSendVote(ctx context.Context, ps *PeerState, votes types.VoteSetReader) (bool, error) {
|
|
vote, ok := ps.PickVoteToSend(votes)
|
|
if !ok {
|
|
return false, nil
|
|
}
|
|
|
|
r.logger.Debug("sending vote message", "ps", ps, "vote", vote)
|
|
if err := r.voteCh.Send(ctx, p2p.Envelope{
|
|
To: ps.peerID,
|
|
Message: &tmcons.Vote{
|
|
Vote: vote.ToProto(),
|
|
},
|
|
}); err != nil {
|
|
return false, err
|
|
}
|
|
|
|
ps.SetHasVote(vote)
|
|
return true, nil
|
|
}
|
|
|
|
func (r *Reactor) gossipVotesForHeight(
|
|
ctx context.Context,
|
|
rs *cstypes.RoundState,
|
|
prs *cstypes.PeerRoundState,
|
|
ps *PeerState,
|
|
) (bool, error) {
|
|
logger := r.logger.With("height", prs.Height).With("peer", ps.peerID)
|
|
|
|
// if there are lastCommits to send...
|
|
if prs.Step == cstypes.RoundStepNewHeight {
|
|
if ok, err := r.pickSendVote(ctx, ps, rs.LastCommit); err != nil {
|
|
return false, err
|
|
} else if ok {
|
|
logger.Debug("picked rs.LastCommit to send")
|
|
return true, nil
|
|
|
|
}
|
|
}
|
|
|
|
// if there are POL prevotes to send...
|
|
if prs.Step <= cstypes.RoundStepPropose && prs.Round != -1 && prs.Round <= rs.Round && prs.ProposalPOLRound != -1 {
|
|
if polPrevotes := rs.Votes.Prevotes(prs.ProposalPOLRound); polPrevotes != nil {
|
|
if ok, err := r.pickSendVote(ctx, ps, polPrevotes); err != nil {
|
|
return false, err
|
|
} else if ok {
|
|
logger.Debug("picked rs.Prevotes(prs.ProposalPOLRound) to send", "round", prs.ProposalPOLRound)
|
|
return true, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
// if there are prevotes to send...
|
|
if prs.Step <= cstypes.RoundStepPrevoteWait && prs.Round != -1 && prs.Round <= rs.Round {
|
|
if ok, err := r.pickSendVote(ctx, ps, rs.Votes.Prevotes(prs.Round)); err != nil {
|
|
return false, err
|
|
} else if ok {
|
|
logger.Debug("picked rs.Prevotes(prs.Round) to send", "round", prs.Round)
|
|
return true, nil
|
|
}
|
|
}
|
|
|
|
// if there are precommits to send...
|
|
if prs.Step <= cstypes.RoundStepPrecommitWait && prs.Round != -1 && prs.Round <= rs.Round {
|
|
if ok, err := r.pickSendVote(ctx, ps, rs.Votes.Precommits(prs.Round)); err != nil {
|
|
return false, err
|
|
} else if ok {
|
|
logger.Debug("picked rs.Precommits(prs.Round) to send", "round", prs.Round)
|
|
return true, nil
|
|
}
|
|
}
|
|
|
|
// if there are prevotes to send...(which are needed because of validBlock mechanism)
|
|
if prs.Round != -1 && prs.Round <= rs.Round {
|
|
if ok, err := r.pickSendVote(ctx, ps, rs.Votes.Prevotes(prs.Round)); err != nil {
|
|
return false, err
|
|
} else if ok {
|
|
logger.Debug("picked rs.Prevotes(prs.Round) to send", "round", prs.Round)
|
|
return true, nil
|
|
}
|
|
}
|
|
|
|
// if there are POLPrevotes to send...
|
|
if prs.ProposalPOLRound != -1 {
|
|
if polPrevotes := rs.Votes.Prevotes(prs.ProposalPOLRound); polPrevotes != nil {
|
|
if ok, err := r.pickSendVote(ctx, ps, polPrevotes); err != nil {
|
|
return false, err
|
|
} else if ok {
|
|
logger.Debug("picked rs.Prevotes(prs.ProposalPOLRound) to send", "round", prs.ProposalPOLRound)
|
|
return true, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
return false, nil
|
|
}
|
|
|
|
func (r *Reactor) gossipVotesRoutine(ctx context.Context, ps *PeerState) {
|
|
logger := r.logger.With("peer", ps.peerID)
|
|
|
|
defer ps.broadcastWG.Done()
|
|
|
|
// XXX: simple hack to throttle logs upon sleep
|
|
logThrottle := 0
|
|
|
|
timer := time.NewTimer(0)
|
|
defer timer.Stop()
|
|
|
|
OUTER_LOOP:
|
|
for {
|
|
if !r.IsRunning() {
|
|
return
|
|
}
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-ps.closer.Done():
|
|
// The peer is marked for removal via a PeerUpdate as the doneCh was
|
|
// explicitly closed to signal we should exit.
|
|
return
|
|
|
|
default:
|
|
}
|
|
|
|
rs := r.state.GetRoundState()
|
|
prs := ps.GetRoundState()
|
|
|
|
switch logThrottle {
|
|
case 1: // first sleep
|
|
logThrottle = 2
|
|
case 2: // no more sleep
|
|
logThrottle = 0
|
|
}
|
|
|
|
// if height matches, then send LastCommit, Prevotes, and Precommits
|
|
if rs.Height == prs.Height {
|
|
if ok, err := r.gossipVotesForHeight(ctx, rs, prs, ps); err != nil {
|
|
return
|
|
} else if ok {
|
|
continue OUTER_LOOP
|
|
}
|
|
}
|
|
|
|
// special catchup logic -- if peer is lagging by height 1, send LastCommit
|
|
if prs.Height != 0 && rs.Height == prs.Height+1 {
|
|
if ok, err := r.pickSendVote(ctx, ps, rs.LastCommit); err != nil {
|
|
return
|
|
} else if ok {
|
|
logger.Debug("picked rs.LastCommit to send", "height", prs.Height)
|
|
continue OUTER_LOOP
|
|
}
|
|
}
|
|
|
|
// catchup logic -- if peer is lagging by more than 1, send Commit
|
|
blockStoreBase := r.state.blockStore.Base()
|
|
if blockStoreBase > 0 && prs.Height != 0 && rs.Height >= prs.Height+2 && prs.Height >= blockStoreBase {
|
|
// Load the block commit for prs.Height, which contains precommit
|
|
// signatures for prs.Height.
|
|
if commit := r.state.blockStore.LoadBlockCommit(prs.Height); commit != nil {
|
|
if ok, err := r.pickSendVote(ctx, ps, commit); err != nil {
|
|
return
|
|
} else if ok {
|
|
logger.Debug("picked Catchup commit to send", "height", prs.Height)
|
|
continue OUTER_LOOP
|
|
}
|
|
}
|
|
}
|
|
|
|
if logThrottle == 0 {
|
|
// we sent nothing -- sleep
|
|
logThrottle = 1
|
|
logger.Debug(
|
|
"no votes to send; sleeping",
|
|
"rs.Height", rs.Height,
|
|
"prs.Height", prs.Height,
|
|
"localPV", rs.Votes.Prevotes(rs.Round).BitArray(), "peerPV", prs.Prevotes,
|
|
"localPC", rs.Votes.Precommits(rs.Round).BitArray(), "peerPC", prs.Precommits,
|
|
)
|
|
} else if logThrottle == 2 {
|
|
logThrottle = 1
|
|
}
|
|
|
|
timer.Reset(r.state.config.PeerGossipSleepDuration)
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-timer.C:
|
|
}
|
|
continue OUTER_LOOP
|
|
}
|
|
}
|
|
|
|
// NOTE: `queryMaj23Routine` has a simple crude design since it only comes
|
|
// into play for liveness when there's a signature DDoS attack happening.
|
|
func (r *Reactor) queryMaj23Routine(ctx context.Context, ps *PeerState) {
|
|
defer ps.broadcastWG.Done()
|
|
|
|
timer := time.NewTimer(0)
|
|
defer timer.Stop()
|
|
|
|
OUTER_LOOP:
|
|
for {
|
|
if !r.IsRunning() {
|
|
return
|
|
}
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-ps.closer.Done():
|
|
// The peer is marked for removal via a PeerUpdate as the doneCh was
|
|
// explicitly closed to signal we should exit.
|
|
return
|
|
default:
|
|
}
|
|
|
|
// maybe send Height/Round/Prevotes
|
|
{
|
|
rs := r.state.GetRoundState()
|
|
prs := ps.GetRoundState()
|
|
|
|
if rs.Height == prs.Height {
|
|
if maj23, ok := rs.Votes.Prevotes(prs.Round).TwoThirdsMajority(); ok {
|
|
if err := r.stateCh.Send(ctx, p2p.Envelope{
|
|
To: ps.peerID,
|
|
Message: &tmcons.VoteSetMaj23{
|
|
Height: prs.Height,
|
|
Round: prs.Round,
|
|
Type: tmproto.PrevoteType,
|
|
BlockID: maj23.ToProto(),
|
|
},
|
|
}); err != nil {
|
|
return
|
|
}
|
|
|
|
timer.Reset(r.state.config.PeerQueryMaj23SleepDuration)
|
|
select {
|
|
case <-timer.C:
|
|
case <-ctx.Done():
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// maybe send Height/Round/Precommits
|
|
{
|
|
rs := r.state.GetRoundState()
|
|
prs := ps.GetRoundState()
|
|
|
|
if rs.Height == prs.Height {
|
|
if maj23, ok := rs.Votes.Precommits(prs.Round).TwoThirdsMajority(); ok {
|
|
if err := r.stateCh.Send(ctx, p2p.Envelope{
|
|
To: ps.peerID,
|
|
Message: &tmcons.VoteSetMaj23{
|
|
Height: prs.Height,
|
|
Round: prs.Round,
|
|
Type: tmproto.PrecommitType,
|
|
BlockID: maj23.ToProto(),
|
|
},
|
|
}); err != nil {
|
|
return
|
|
}
|
|
|
|
select {
|
|
case <-timer.C:
|
|
timer.Reset(r.state.config.PeerQueryMaj23SleepDuration)
|
|
case <-ctx.Done():
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// maybe send Height/Round/ProposalPOL
|
|
{
|
|
rs := r.state.GetRoundState()
|
|
prs := ps.GetRoundState()
|
|
|
|
if rs.Height == prs.Height && prs.ProposalPOLRound >= 0 {
|
|
if maj23, ok := rs.Votes.Prevotes(prs.ProposalPOLRound).TwoThirdsMajority(); ok {
|
|
if err := r.stateCh.Send(ctx, p2p.Envelope{
|
|
To: ps.peerID,
|
|
Message: &tmcons.VoteSetMaj23{
|
|
Height: prs.Height,
|
|
Round: prs.ProposalPOLRound,
|
|
Type: tmproto.PrevoteType,
|
|
BlockID: maj23.ToProto(),
|
|
},
|
|
}); err != nil {
|
|
return
|
|
}
|
|
|
|
timer.Reset(r.state.config.PeerQueryMaj23SleepDuration)
|
|
select {
|
|
case <-timer.C:
|
|
case <-ctx.Done():
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Little point sending LastCommitRound/LastCommit, these are fleeting and
|
|
// non-blocking.
|
|
|
|
// maybe send Height/CatchupCommitRound/CatchupCommit
|
|
{
|
|
prs := ps.GetRoundState()
|
|
|
|
if prs.CatchupCommitRound != -1 && prs.Height > 0 && prs.Height <= r.state.blockStore.Height() &&
|
|
prs.Height >= r.state.blockStore.Base() {
|
|
if commit := r.state.LoadCommit(prs.Height); commit != nil {
|
|
if err := r.stateCh.Send(ctx, p2p.Envelope{
|
|
To: ps.peerID,
|
|
Message: &tmcons.VoteSetMaj23{
|
|
Height: prs.Height,
|
|
Round: commit.Round,
|
|
Type: tmproto.PrecommitType,
|
|
BlockID: commit.BlockID.ToProto(),
|
|
},
|
|
}); err != nil {
|
|
return
|
|
}
|
|
|
|
timer.Reset(r.state.config.PeerQueryMaj23SleepDuration)
|
|
select {
|
|
case <-timer.C:
|
|
case <-ctx.Done():
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
timer.Reset(r.state.config.PeerQueryMaj23SleepDuration)
|
|
select {
|
|
case <-timer.C:
|
|
case <-ctx.Done():
|
|
return
|
|
}
|
|
|
|
continue OUTER_LOOP
|
|
}
|
|
}
|
|
|
|
// processPeerUpdate process a peer update message. For new or reconnected peers,
|
|
// we create a peer state if one does not exist for the peer, which should always
|
|
// be the case, and we spawn all the relevant goroutine to broadcast messages to
|
|
// the peer. During peer removal, we remove the peer for our set of peers and
|
|
// signal to all spawned goroutines to gracefully exit in a non-blocking manner.
|
|
func (r *Reactor) processPeerUpdate(ctx context.Context, peerUpdate p2p.PeerUpdate) {
|
|
r.logger.Debug("received peer update", "peer", peerUpdate.NodeID, "status", peerUpdate.Status)
|
|
|
|
r.mtx.Lock()
|
|
defer r.mtx.Unlock()
|
|
|
|
switch peerUpdate.Status {
|
|
case p2p.PeerStatusUp:
|
|
// Do not allow starting new broadcasting goroutines after reactor shutdown
|
|
// has been initiated. This can happen after we've manually closed all
|
|
// peer goroutines, but the router still sends in-flight peer updates.
|
|
if !r.IsRunning() {
|
|
return
|
|
}
|
|
|
|
var (
|
|
ps *PeerState
|
|
ok bool
|
|
)
|
|
|
|
ps, ok = r.peers[peerUpdate.NodeID]
|
|
if !ok {
|
|
ps = NewPeerState(r.logger, peerUpdate.NodeID)
|
|
r.peers[peerUpdate.NodeID] = ps
|
|
}
|
|
|
|
if !ps.IsRunning() {
|
|
// Set the peer state's closer to signal to all spawned goroutines to exit
|
|
// when the peer is removed. We also set the running state to ensure we
|
|
// do not spawn multiple instances of the same goroutines and finally we
|
|
// set the waitgroup counter so we know when all goroutines have exited.
|
|
ps.broadcastWG.Add(3)
|
|
ps.SetRunning(true)
|
|
|
|
// start goroutines for this peer
|
|
go r.gossipDataRoutine(ctx, ps)
|
|
go r.gossipVotesRoutine(ctx, ps)
|
|
go r.queryMaj23Routine(ctx, ps)
|
|
|
|
// Send our state to the peer. If we're block-syncing, broadcast a
|
|
// RoundStepMessage later upon SwitchToConsensus().
|
|
if !r.waitSync {
|
|
go func() { _ = r.sendNewRoundStepMessage(ctx, ps.peerID) }()
|
|
}
|
|
}
|
|
|
|
case p2p.PeerStatusDown:
|
|
ps, ok := r.peers[peerUpdate.NodeID]
|
|
if ok && ps.IsRunning() {
|
|
// signal to all spawned goroutines for the peer to gracefully exit
|
|
ps.closer.Close()
|
|
|
|
go func() {
|
|
// Wait for all spawned broadcast goroutines to exit before marking the
|
|
// peer state as no longer running and removal from the peers map.
|
|
ps.broadcastWG.Wait()
|
|
|
|
r.mtx.Lock()
|
|
delete(r.peers, peerUpdate.NodeID)
|
|
r.mtx.Unlock()
|
|
|
|
ps.SetRunning(false)
|
|
}()
|
|
}
|
|
}
|
|
}
|
|
|
|
// handleStateMessage handles envelopes sent from peers on the StateChannel.
|
|
// An error is returned if the message is unrecognized or if validation fails.
|
|
// If we fail to find the peer state for the envelope sender, we perform a no-op
|
|
// and return. This can happen when we process the envelope after the peer is
|
|
// removed.
|
|
func (r *Reactor) handleStateMessage(ctx context.Context, envelope *p2p.Envelope, msgI Message) error {
|
|
ps, ok := r.GetPeerState(envelope.From)
|
|
if !ok || ps == nil {
|
|
r.logger.Debug("failed to find peer state", "peer", envelope.From, "ch_id", "StateChannel")
|
|
return nil
|
|
}
|
|
|
|
switch msg := envelope.Message.(type) {
|
|
case *tmcons.NewRoundStep:
|
|
r.state.mtx.RLock()
|
|
initialHeight := r.state.state.InitialHeight
|
|
r.state.mtx.RUnlock()
|
|
|
|
if err := msgI.(*NewRoundStepMessage).ValidateHeight(initialHeight); err != nil {
|
|
r.logger.Error("peer sent us an invalid msg", "msg", msg, "err", err)
|
|
return err
|
|
}
|
|
|
|
ps.ApplyNewRoundStepMessage(msgI.(*NewRoundStepMessage))
|
|
|
|
case *tmcons.NewValidBlock:
|
|
ps.ApplyNewValidBlockMessage(msgI.(*NewValidBlockMessage))
|
|
|
|
case *tmcons.HasVote:
|
|
ps.ApplyHasVoteMessage(msgI.(*HasVoteMessage))
|
|
|
|
case *tmcons.VoteSetMaj23:
|
|
r.state.mtx.RLock()
|
|
height, votes := r.state.Height, r.state.Votes
|
|
r.state.mtx.RUnlock()
|
|
|
|
if height != msg.Height {
|
|
return nil
|
|
}
|
|
|
|
vsmMsg := msgI.(*VoteSetMaj23Message)
|
|
|
|
// peer claims to have a maj23 for some BlockID at <H,R,S>
|
|
err := votes.SetPeerMaj23(msg.Round, msg.Type, ps.peerID, vsmMsg.BlockID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Respond with a VoteSetBitsMessage showing which votes we have and
|
|
// consequently shows which we don't have.
|
|
var ourVotes *bits.BitArray
|
|
switch vsmMsg.Type {
|
|
case tmproto.PrevoteType:
|
|
ourVotes = votes.Prevotes(msg.Round).BitArrayByBlockID(vsmMsg.BlockID)
|
|
|
|
case tmproto.PrecommitType:
|
|
ourVotes = votes.Precommits(msg.Round).BitArrayByBlockID(vsmMsg.BlockID)
|
|
|
|
default:
|
|
panic("bad VoteSetBitsMessage field type; forgot to add a check in ValidateBasic?")
|
|
}
|
|
|
|
eMsg := &tmcons.VoteSetBits{
|
|
Height: msg.Height,
|
|
Round: msg.Round,
|
|
Type: msg.Type,
|
|
BlockID: msg.BlockID,
|
|
}
|
|
|
|
if votesProto := ourVotes.ToProto(); votesProto != nil {
|
|
eMsg.Votes = *votesProto
|
|
}
|
|
|
|
if err := r.voteSetBitsCh.Send(ctx, p2p.Envelope{
|
|
To: envelope.From,
|
|
Message: eMsg,
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
default:
|
|
return fmt.Errorf("received unknown message on StateChannel: %T", msg)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// handleDataMessage handles envelopes sent from peers on the DataChannel. If we
|
|
// fail to find the peer state for the envelope sender, we perform a no-op and
|
|
// return. This can happen when we process the envelope after the peer is
|
|
// removed.
|
|
func (r *Reactor) handleDataMessage(ctx context.Context, envelope *p2p.Envelope, msgI Message) error {
|
|
logger := r.logger.With("peer", envelope.From, "ch_id", "DataChannel")
|
|
|
|
ps, ok := r.GetPeerState(envelope.From)
|
|
if !ok || ps == nil {
|
|
r.logger.Debug("failed to find peer state")
|
|
return nil
|
|
}
|
|
|
|
if r.WaitSync() {
|
|
logger.Info("ignoring message received during sync", "msg", fmt.Sprintf("%T", msgI))
|
|
return nil
|
|
}
|
|
|
|
switch msg := envelope.Message.(type) {
|
|
case *tmcons.Proposal:
|
|
pMsg := msgI.(*ProposalMessage)
|
|
|
|
ps.SetHasProposal(pMsg.Proposal)
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case r.state.peerMsgQueue <- msgInfo{pMsg, envelope.From, tmtime.Now()}:
|
|
}
|
|
case *tmcons.ProposalPOL:
|
|
ps.ApplyProposalPOLMessage(msgI.(*ProposalPOLMessage))
|
|
case *tmcons.BlockPart:
|
|
bpMsg := msgI.(*BlockPartMessage)
|
|
|
|
ps.SetHasProposalBlockPart(bpMsg.Height, bpMsg.Round, int(bpMsg.Part.Index))
|
|
r.Metrics.BlockParts.With("peer_id", string(envelope.From)).Add(1)
|
|
select {
|
|
case r.state.peerMsgQueue <- msgInfo{bpMsg, envelope.From, tmtime.Now()}:
|
|
return nil
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
}
|
|
|
|
default:
|
|
return fmt.Errorf("received unknown message on DataChannel: %T", msg)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// handleVoteMessage handles envelopes sent from peers on the VoteChannel. If we
|
|
// fail to find the peer state for the envelope sender, we perform a no-op and
|
|
// return. This can happen when we process the envelope after the peer is
|
|
// removed.
|
|
func (r *Reactor) handleVoteMessage(ctx context.Context, envelope *p2p.Envelope, msgI Message) error {
|
|
logger := r.logger.With("peer", envelope.From, "ch_id", "VoteChannel")
|
|
|
|
ps, ok := r.GetPeerState(envelope.From)
|
|
if !ok || ps == nil {
|
|
r.logger.Debug("failed to find peer state")
|
|
return nil
|
|
}
|
|
|
|
if r.WaitSync() {
|
|
logger.Info("ignoring message received during sync", "msg", msgI)
|
|
return nil
|
|
}
|
|
|
|
switch msg := envelope.Message.(type) {
|
|
case *tmcons.Vote:
|
|
r.state.mtx.RLock()
|
|
height, valSize, lastCommitSize := r.state.Height, r.state.Validators.Size(), r.state.LastCommit.Size()
|
|
r.state.mtx.RUnlock()
|
|
|
|
vMsg := msgI.(*VoteMessage)
|
|
|
|
ps.EnsureVoteBitArrays(height, valSize)
|
|
ps.EnsureVoteBitArrays(height-1, lastCommitSize)
|
|
ps.SetHasVote(vMsg.Vote)
|
|
|
|
select {
|
|
case r.state.peerMsgQueue <- msgInfo{vMsg, envelope.From, tmtime.Now()}:
|
|
return nil
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
}
|
|
default:
|
|
return fmt.Errorf("received unknown message on VoteChannel: %T", msg)
|
|
}
|
|
}
|
|
|
|
// handleVoteSetBitsMessage handles envelopes sent from peers on the
|
|
// VoteSetBitsChannel. If we fail to find the peer state for the envelope sender,
|
|
// we perform a no-op and return. This can happen when we process the envelope
|
|
// after the peer is removed.
|
|
func (r *Reactor) handleVoteSetBitsMessage(ctx context.Context, envelope *p2p.Envelope, msgI Message) error {
|
|
logger := r.logger.With("peer", envelope.From, "ch_id", "VoteSetBitsChannel")
|
|
|
|
ps, ok := r.GetPeerState(envelope.From)
|
|
if !ok || ps == nil {
|
|
r.logger.Debug("failed to find peer state")
|
|
return nil
|
|
}
|
|
|
|
if r.WaitSync() {
|
|
logger.Info("ignoring message received during sync", "msg", msgI)
|
|
return nil
|
|
}
|
|
|
|
switch msg := envelope.Message.(type) {
|
|
case *tmcons.VoteSetBits:
|
|
r.state.mtx.RLock()
|
|
height, votes := r.state.Height, r.state.Votes
|
|
r.state.mtx.RUnlock()
|
|
|
|
vsbMsg := msgI.(*VoteSetBitsMessage)
|
|
|
|
if height == msg.Height {
|
|
var ourVotes *bits.BitArray
|
|
|
|
switch msg.Type {
|
|
case tmproto.PrevoteType:
|
|
ourVotes = votes.Prevotes(msg.Round).BitArrayByBlockID(vsbMsg.BlockID)
|
|
|
|
case tmproto.PrecommitType:
|
|
ourVotes = votes.Precommits(msg.Round).BitArrayByBlockID(vsbMsg.BlockID)
|
|
|
|
default:
|
|
panic("bad VoteSetBitsMessage field type; forgot to add a check in ValidateBasic?")
|
|
}
|
|
|
|
ps.ApplyVoteSetBitsMessage(vsbMsg, ourVotes)
|
|
} else {
|
|
ps.ApplyVoteSetBitsMessage(vsbMsg, nil)
|
|
}
|
|
|
|
default:
|
|
return fmt.Errorf("received unknown message on VoteSetBitsChannel: %T", msg)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// handleMessage handles an Envelope sent from a peer on a specific p2p Channel.
|
|
// It will handle errors and any possible panics gracefully. A caller can handle
|
|
// any error returned by sending a PeerError on the respective channel.
|
|
//
|
|
// NOTE: We process these messages even when we're block syncing. Messages affect
|
|
// either a peer state or the consensus state. Peer state updates can happen in
|
|
// parallel, but processing of proposals, block parts, and votes are ordered by
|
|
// the p2p channel.
|
|
//
|
|
// NOTE: We block on consensus state for proposals, block parts, and votes.
|
|
func (r *Reactor) handleMessage(ctx context.Context, chID p2p.ChannelID, envelope *p2p.Envelope) (err error) {
|
|
defer func() {
|
|
if e := recover(); e != nil {
|
|
err = fmt.Errorf("panic in processing message: %v", e)
|
|
r.logger.Error(
|
|
"recovering from processing message panic",
|
|
"err", err,
|
|
"stack", string(debug.Stack()),
|
|
)
|
|
}
|
|
}()
|
|
|
|
// We wrap the envelope's message in a Proto wire type so we can convert back
|
|
// the domain type that individual channel message handlers can work with. We
|
|
// do this here once to avoid having to do it for each individual message type.
|
|
// and because a large part of the core business logic depends on these
|
|
// domain types opposed to simply working with the Proto types.
|
|
protoMsg := new(tmcons.Message)
|
|
if err := protoMsg.Wrap(envelope.Message); err != nil {
|
|
return err
|
|
}
|
|
|
|
msgI, err := MsgFromProto(protoMsg)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
r.logger.Debug("received message", "ch_id", chID, "message", msgI, "peer", envelope.From)
|
|
|
|
switch chID {
|
|
case StateChannel:
|
|
err = r.handleStateMessage(ctx, envelope, msgI)
|
|
|
|
case DataChannel:
|
|
err = r.handleDataMessage(ctx, envelope, msgI)
|
|
|
|
case VoteChannel:
|
|
err = r.handleVoteMessage(ctx, envelope, msgI)
|
|
|
|
case VoteSetBitsChannel:
|
|
err = r.handleVoteSetBitsMessage(ctx, envelope, msgI)
|
|
|
|
default:
|
|
err = fmt.Errorf("unknown channel ID (%d) for envelope (%v)", chID, envelope)
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
// processStateCh initiates a blocking process where we listen for and handle
|
|
// envelopes on the StateChannel. Any error encountered during message
|
|
// execution will result in a PeerError being sent on the StateChannel. When
|
|
// the reactor is stopped, we will catch the signal and close the p2p Channel
|
|
// gracefully.
|
|
func (r *Reactor) processStateCh(ctx context.Context) {
|
|
iter := r.stateCh.Receive(ctx)
|
|
for iter.Next(ctx) {
|
|
envelope := iter.Envelope()
|
|
if err := r.handleMessage(ctx, r.stateCh.ID, envelope); err != nil {
|
|
r.logger.Error("failed to process message", "ch_id", r.stateCh.ID, "envelope", envelope, "err", err)
|
|
if serr := r.stateCh.SendError(ctx, p2p.PeerError{
|
|
NodeID: envelope.From,
|
|
Err: err,
|
|
}); serr != nil {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// processDataCh initiates a blocking process where we listen for and handle
|
|
// envelopes on the DataChannel. Any error encountered during message
|
|
// execution will result in a PeerError being sent on the DataChannel. When
|
|
// the reactor is stopped, we will catch the signal and close the p2p Channel
|
|
// gracefully.
|
|
func (r *Reactor) processDataCh(ctx context.Context) {
|
|
iter := r.dataCh.Receive(ctx)
|
|
for iter.Next(ctx) {
|
|
envelope := iter.Envelope()
|
|
if err := r.handleMessage(ctx, r.dataCh.ID, envelope); err != nil {
|
|
r.logger.Error("failed to process message", "ch_id", r.dataCh.ID, "envelope", envelope, "err", err)
|
|
if serr := r.dataCh.SendError(ctx, p2p.PeerError{
|
|
NodeID: envelope.From,
|
|
Err: err,
|
|
}); serr != nil {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// processVoteCh initiates a blocking process where we listen for and handle
|
|
// envelopes on the VoteChannel. Any error encountered during message
|
|
// execution will result in a PeerError being sent on the VoteChannel. When
|
|
// the reactor is stopped, we will catch the signal and close the p2p Channel
|
|
// gracefully.
|
|
func (r *Reactor) processVoteCh(ctx context.Context) {
|
|
iter := r.voteCh.Receive(ctx)
|
|
for iter.Next(ctx) {
|
|
envelope := iter.Envelope()
|
|
if err := r.handleMessage(ctx, r.voteCh.ID, envelope); err != nil {
|
|
r.logger.Error("failed to process message", "ch_id", r.voteCh.ID, "envelope", envelope, "err", err)
|
|
if serr := r.voteCh.SendError(ctx, p2p.PeerError{
|
|
NodeID: envelope.From,
|
|
Err: err,
|
|
}); serr != nil {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// processVoteCh initiates a blocking process where we listen for and handle
|
|
// envelopes on the VoteSetBitsChannel. Any error encountered during message
|
|
// execution will result in a PeerError being sent on the VoteSetBitsChannel.
|
|
// When the reactor is stopped, we will catch the signal and close the p2p
|
|
// Channel gracefully.
|
|
func (r *Reactor) processVoteSetBitsCh(ctx context.Context) {
|
|
iter := r.voteSetBitsCh.Receive(ctx)
|
|
for iter.Next(ctx) {
|
|
envelope := iter.Envelope()
|
|
|
|
if err := r.handleMessage(ctx, r.voteSetBitsCh.ID, envelope); err != nil {
|
|
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
|
return
|
|
}
|
|
|
|
r.logger.Error("failed to process message", "ch_id", r.voteSetBitsCh.ID, "envelope", envelope, "err", err)
|
|
if serr := r.voteSetBitsCh.SendError(ctx, p2p.PeerError{
|
|
NodeID: envelope.From,
|
|
Err: err,
|
|
}); serr != nil {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// processPeerUpdates initiates a blocking process where we listen for and handle
|
|
// PeerUpdate messages. When the reactor is stopped, we will catch the signal and
|
|
// close the p2p PeerUpdatesCh gracefully.
|
|
func (r *Reactor) processPeerUpdates(ctx context.Context) {
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case peerUpdate := <-r.peerUpdates.Updates():
|
|
r.processPeerUpdate(ctx, peerUpdate)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (r *Reactor) peerStatsRoutine(ctx context.Context) {
|
|
for {
|
|
if !r.IsRunning() {
|
|
r.logger.Info("stopping peerStatsRoutine")
|
|
return
|
|
}
|
|
|
|
select {
|
|
case msg := <-r.state.statsMsgQueue:
|
|
ps, ok := r.GetPeerState(msg.PeerID)
|
|
if !ok || ps == nil {
|
|
r.logger.Debug("attempt to update stats for non-existent peer", "peer", msg.PeerID)
|
|
continue
|
|
}
|
|
|
|
switch msg.Msg.(type) {
|
|
case *VoteMessage:
|
|
if numVotes := ps.RecordVote(); numVotes%votesToContributeToBecomeGoodPeer == 0 {
|
|
r.peerUpdates.SendUpdate(ctx, p2p.PeerUpdate{
|
|
NodeID: msg.PeerID,
|
|
Status: p2p.PeerStatusGood,
|
|
})
|
|
}
|
|
|
|
case *BlockPartMessage:
|
|
if numParts := ps.RecordBlockPart(); numParts%blocksToContributeToBecomeGoodPeer == 0 {
|
|
r.peerUpdates.SendUpdate(ctx, p2p.PeerUpdate{
|
|
NodeID: msg.PeerID,
|
|
Status: p2p.PeerStatusGood,
|
|
})
|
|
}
|
|
}
|
|
case <-ctx.Done():
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (r *Reactor) GetConsensusState() *State {
|
|
return r.state
|
|
}
|
|
|
|
func (r *Reactor) SetStateSyncingMetrics(v float64) {
|
|
r.Metrics.StateSyncing.Set(v)
|
|
}
|
|
|
|
func (r *Reactor) SetBlockSyncingMetrics(v float64) {
|
|
r.Metrics.BlockSyncing.Set(v)
|
|
}
|