You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

458 lines
13 KiB

package v0
import (
"fmt"
"reflect"
"time"
bc "github.com/tendermint/tendermint/blockchain"
"github.com/tendermint/tendermint/libs/log"
"github.com/tendermint/tendermint/p2p"
bcproto "github.com/tendermint/tendermint/proto/tendermint/blockchain"
sm "github.com/tendermint/tendermint/state"
"github.com/tendermint/tendermint/store"
"github.com/tendermint/tendermint/types"
)
const (
// BlockchainChannel is a channel for blocks and status updates (`BlockStore` height)
BlockchainChannel = byte(0x40)
trySyncIntervalMS = 10
// stop syncing when last block's time is
// within this much of the system time.
// stopSyncingDurationMinutes = 10
// ask for best height every 10s
statusUpdateIntervalSeconds = 10
// check if we should switch to consensus reactor
switchToConsensusIntervalSeconds = 1
// switch to consensus after this duration of inactivity
syncTimeout = 60 * time.Second
)
type consensusReactor interface {
// for when we switch from blockchain reactor and fast sync to
// the consensus machine
SwitchToConsensus(state sm.State, skipWAL bool)
}
type peerError struct {
err error
peerID p2p.ID
}
func (e peerError) Error() string {
return fmt.Sprintf("error with peer %v: %s", e.peerID, e.err.Error())
}
// BlockchainReactor handles long-term catchup syncing.
type BlockchainReactor struct {
p2p.BaseReactor
// immutable
initialState sm.State
blockExec *sm.BlockExecutor
store *store.BlockStore
pool *BlockPool
fastSync bool
requestsCh <-chan BlockRequest
errorsCh <-chan peerError
}
// NewBlockchainReactor returns new reactor instance.
func NewBlockchainReactor(state sm.State, blockExec *sm.BlockExecutor, store *store.BlockStore,
fastSync bool) *BlockchainReactor {
if state.LastBlockHeight != store.Height() {
panic(fmt.Sprintf("state (%v) and store (%v) height mismatch", state.LastBlockHeight,
store.Height()))
}
requestsCh := make(chan BlockRequest, maxTotalRequesters)
const capacity = 1000 // must be bigger than peers count
errorsCh := make(chan peerError, capacity) // so we don't block in #Receive#pool.AddBlock
startHeight := store.Height() + 1
if startHeight == 1 {
startHeight = state.InitialHeight
}
pool := NewBlockPool(startHeight, requestsCh, errorsCh)
bcR := &BlockchainReactor{
initialState: state,
blockExec: blockExec,
store: store,
pool: pool,
fastSync: fastSync,
requestsCh: requestsCh,
errorsCh: errorsCh,
}
bcR.BaseReactor = *p2p.NewBaseReactor("BlockchainReactor", bcR)
return bcR
}
// SetLogger implements service.Service by setting the logger on reactor and pool.
func (bcR *BlockchainReactor) SetLogger(l log.Logger) {
bcR.BaseService.Logger = l
bcR.pool.Logger = l
}
// OnStart implements service.Service.
func (bcR *BlockchainReactor) OnStart() error {
if bcR.fastSync {
err := bcR.pool.Start()
if err != nil {
return err
}
go bcR.poolRoutine(false)
}
return nil
}
// SwitchToFastSync is called by the state sync reactor when switching to fast sync.
func (bcR *BlockchainReactor) SwitchToFastSync(state sm.State) error {
bcR.fastSync = true
bcR.initialState = state
bcR.pool.height = state.LastBlockHeight + 1
err := bcR.pool.Start()
if err != nil {
return err
}
go bcR.poolRoutine(true)
return nil
}
// OnStop implements service.Service.
func (bcR *BlockchainReactor) OnStop() {
if bcR.fastSync {
if err := bcR.pool.Stop(); err != nil {
bcR.Logger.Error("Error stopping pool", "err", err)
}
}
}
// GetChannels implements Reactor
func (bcR *BlockchainReactor) GetChannels() []*p2p.ChannelDescriptor {
return []*p2p.ChannelDescriptor{
{
ID: BlockchainChannel,
Priority: 10,
SendQueueCapacity: 1000,
RecvBufferCapacity: 50 * 4096,
RecvMessageCapacity: bc.MaxMsgSize,
},
}
}
// AddPeer implements Reactor by sending our state to peer.
func (bcR *BlockchainReactor) AddPeer(peer p2p.Peer) {
msgBytes, err := bc.EncodeMsg(&bcproto.StatusResponse{
Base: bcR.store.Base(),
Height: bcR.store.Height()})
if err != nil {
bcR.Logger.Error("could not convert msg to protobuf", "err", err)
return
}
_ = peer.Send(BlockchainChannel, msgBytes)
// it's OK if send fails. will try later in poolRoutine
// peer is added to the pool once we receive the first
// bcStatusResponseMessage from the peer and call pool.SetPeerRange
}
// RemovePeer implements Reactor by removing peer from the pool.
func (bcR *BlockchainReactor) RemovePeer(peer p2p.Peer, reason interface{}) {
bcR.pool.RemovePeer(peer.ID())
}
// respondToPeer loads a block and sends it to the requesting peer,
// if we have it. Otherwise, we'll respond saying we don't have it.
func (bcR *BlockchainReactor) respondToPeer(msg *bcproto.BlockRequest,
src p2p.Peer) (queued bool) {
block := bcR.store.LoadBlock(msg.Height)
if block != nil {
bl, err := block.ToProto()
if err != nil {
bcR.Logger.Error("could not convert msg to protobuf", "err", err)
return false
}
msgBytes, err := bc.EncodeMsg(&bcproto.BlockResponse{Block: bl})
if err != nil {
bcR.Logger.Error("could not marshal msg", "err", err)
return false
}
return src.TrySend(BlockchainChannel, msgBytes)
}
bcR.Logger.Info("Peer asking for a block we don't have", "src", src, "height", msg.Height)
msgBytes, err := bc.EncodeMsg(&bcproto.NoBlockResponse{Height: msg.Height})
if err != nil {
bcR.Logger.Error("could not convert msg to protobuf", "err", err)
return false
}
return src.TrySend(BlockchainChannel, msgBytes)
}
// Receive implements Reactor by handling 4 types of messages (look below).
// XXX: do not call any methods that can block or incur heavy processing.
// https://github.com/tendermint/tendermint/issues/2888
func (bcR *BlockchainReactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
logger := bcR.Logger.With("src", src, "chId", chID)
msg, err := bc.DecodeMsg(msgBytes)
if err != nil {
logger.Error("Error decoding message", "err", err)
bcR.Switch.StopPeerForError(src, err)
return
}
if err = bc.ValidateMsg(msg); err != nil {
logger.Error("Peer sent us invalid msg", "msg", msg, "err", err)
bcR.Switch.StopPeerForError(src, err)
return
}
logger.Debug("Receive", "msg", msg)
switch msg := msg.(type) {
case *bcproto.BlockRequest:
bcR.respondToPeer(msg, src)
case *bcproto.BlockResponse:
bi, err := types.BlockFromProto(msg.Block)
if err != nil {
logger.Error("Block content is invalid", "err", err)
bcR.Switch.StopPeerForError(src, err)
return
}
bcR.pool.AddBlock(src.ID(), bi, len(msgBytes))
case *bcproto.StatusRequest:
// Send peer our state.
msgBytes, err := bc.EncodeMsg(&bcproto.StatusResponse{
Height: bcR.store.Height(),
Base: bcR.store.Base(),
})
if err != nil {
logger.Error("could not convert msg to protobut", "err", err)
return
}
src.TrySend(BlockchainChannel, msgBytes)
case *bcproto.StatusResponse:
// Got a peer status. Unverified.
bcR.pool.SetPeerRange(src.ID(), msg.Base, msg.Height)
case *bcproto.NoBlockResponse:
logger.Debug("Peer does not have requested block", "height", msg.Height)
default:
logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg)))
}
}
// Handle messages from the poolReactor telling the reactor what to do.
// NOTE: Don't sleep in the FOR_LOOP or otherwise slow it down!
func (bcR *BlockchainReactor) poolRoutine(stateSynced bool) {
var (
trySyncTicker = time.NewTicker(trySyncIntervalMS * time.Millisecond)
statusUpdateTicker = time.NewTicker(statusUpdateIntervalSeconds * time.Second)
switchToConsensusTicker = time.NewTicker(switchToConsensusIntervalSeconds * time.Second)
blocksSynced = uint64(0)
chainID = bcR.initialState.ChainID
state = bcR.initialState
lastHundred = time.Now()
lastRate = 0.0
didProcessCh = make(chan struct{}, 1)
)
go func() {
for {
select {
case <-bcR.Quit():
return
case <-bcR.pool.Quit():
return
case request := <-bcR.requestsCh:
peer := bcR.Switch.Peers().Get(request.PeerID)
if peer == nil {
bcR.Logger.Debug("Can't send request: no peer", "peer_id", request.PeerID)
continue
}
msgBytes, err := bc.EncodeMsg(&bcproto.BlockRequest{Height: request.Height})
if err != nil {
bcR.Logger.Error("could not convert BlockRequest to proto", "err", err)
continue
}
queued := peer.TrySend(BlockchainChannel, msgBytes)
if !queued {
bcR.Logger.Debug("Send queue is full, drop block request", "peer", peer.ID(), "height", request.Height)
}
case err := <-bcR.errorsCh:
peer := bcR.Switch.Peers().Get(err.peerID)
if peer != nil {
bcR.Switch.StopPeerForError(peer, err)
}
case <-statusUpdateTicker.C:
// ask for status updates
go bcR.BroadcastStatusRequest()
}
}
}()
FOR_LOOP:
for {
select {
case <-switchToConsensusTicker.C:
var (
height, numPending, lenRequesters = bcR.pool.GetStatus()
outbound, inbound, _ = bcR.Switch.NumPeers()
lastAdvance = bcR.pool.LastAdvance()
)
bcR.Logger.Debug("Consensus ticker",
"numPending", numPending,
"total", lenRequesters)
switch {
case bcR.pool.IsCaughtUp():
bcR.Logger.Info("Time to switch to consensus reactor!", "height", height)
case time.Since(lastAdvance) > syncTimeout:
bcR.Logger.Error(fmt.Sprintf("No progress since last advance: %v", lastAdvance))
default:
bcR.Logger.Info("Not caught up yet",
"height", height, "max_peer_height", bcR.pool.MaxPeerHeight(),
"num_peers", outbound+inbound,
"timeout_in", syncTimeout-time.Since(lastAdvance))
continue
}
if err := bcR.pool.Stop(); err != nil {
bcR.Logger.Error("Error stopping pool", "err", err)
}
conR, ok := bcR.Switch.Reactor("CONSENSUS").(consensusReactor)
if ok {
conR.SwitchToConsensus(state, blocksSynced > 0 || stateSynced)
}
break FOR_LOOP
case <-trySyncTicker.C: // chan time
select {
case didProcessCh <- struct{}{}:
default:
}
case <-didProcessCh:
// NOTE: It is a subtle mistake to process more than a single block
// at a time (e.g. 10) here, because we only TrySend 1 request per
// loop. The ratio mismatch can result in starving of blocks, a
// sudden burst of requests and responses, and repeat.
// Consequently, it is better to split these routines rather than
// coupling them as it's written here. TODO uncouple from request
// routine.
// See if there are any blocks to sync.
first, second := bcR.pool.PeekTwoBlocks()
// bcR.Logger.Info("TrySync peeked", "first", first, "second", second)
if first == nil || second == nil {
// We need both to sync the first block.
continue FOR_LOOP
} else {
// Try again quickly next loop.
didProcessCh <- struct{}{}
}
var (
firstParts = first.MakePartSet(types.BlockPartSizeBytes)
firstPartSetHeader = firstParts.Header()
firstID = types.BlockID{Hash: first.Hash(), PartSetHeader: firstPartSetHeader}
)
// Finally, verify the first block using the second's commit
// NOTE: we can probably make this more efficient, but note that calling
// first.Hash() doesn't verify the tx contents, so MakePartSet() is
// currently necessary.
err := state.Validators.VerifyCommitLight(chainID, firstID, first.Height, second.LastCommit)
if err != nil {
err = fmt.Errorf("invalid last commit: %w", err)
bcR.Logger.Error(err.Error(),
"last_commit", second.LastCommit, "block_id", firstID, "height", first.Height)
peerID := bcR.pool.RedoRequest(first.Height)
peer := bcR.Switch.Peers().Get(peerID)
if peer != nil {
// NOTE: we've already removed the peer's request, but we still need
// to clean up the rest.
bcR.Switch.StopPeerForError(peer, err)
}
peerID2 := bcR.pool.RedoRequest(second.Height)
if peerID2 != peerID {
if peer2 := bcR.Switch.Peers().Get(peerID2); peer2 != nil {
bcR.Switch.StopPeerForError(peer2, err)
}
}
continue FOR_LOOP
} else {
bcR.pool.PopRequest()
// TODO: batch saves so we dont persist to disk every block
bcR.store.SaveBlock(first, firstParts, second.LastCommit)
// TODO: same thing for app - but we would need a way to get the hash
// without persisting the state.
var err error
state, _, err = bcR.blockExec.ApplyBlock(state, firstID, first)
if err != nil {
// TODO This is bad, are we zombie?
panic(fmt.Sprintf("Failed to process committed block (%d:%X): %v", first.Height, first.Hash(), err))
}
blocksSynced++
if blocksSynced%100 == 0 {
lastRate = 0.9*lastRate + 0.1*(100/time.Since(lastHundred).Seconds())
bcR.Logger.Info("Fast Sync Rate",
"height", bcR.pool.height, "max_peer_height", bcR.pool.MaxPeerHeight(), "blocks/s", lastRate)
lastHundred = time.Now()
}
}
continue FOR_LOOP
case <-bcR.Quit():
break FOR_LOOP
}
}
}
// BroadcastStatusRequest broadcasts `BlockStore` base and height.
func (bcR *BlockchainReactor) BroadcastStatusRequest() {
bm, err := bc.EncodeMsg(&bcproto.StatusRequest{})
if err != nil {
bcR.Logger.Error("could not convert StatusRequest to proto", "err", err)
return
}
// We don't care about whenever broadcast is successful or not.
_ = bcR.Switch.Broadcast(BlockchainChannel, bm)
}