You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

228 lines
5.6 KiB

package main
import (
"context"
"errors"
"fmt"
"time"
rpchttp "github.com/tendermint/tendermint/rpc/client/http"
rpctypes "github.com/tendermint/tendermint/rpc/coretypes"
e2e "github.com/tendermint/tendermint/test/e2e/pkg"
"github.com/tendermint/tendermint/types"
)
// waitForHeight waits for the network to reach a certain height (or above),
// returning the block at the height seen. Errors if the network is not making
// progress at all.
// If height == 0, the initial height of the test network is used as the target.
func waitForHeight(ctx context.Context, testnet *e2e.Testnet, height int64) (*types.Block, *types.BlockID, error) {
var (
err error
clients = map[string]*rpchttp.HTTP{}
lastHeight int64
lastIncrease = time.Now()
nodesAtHeight = map[string]struct{}{}
numRunningNodes int
)
if height == 0 {
height = testnet.InitialHeight
}
for _, node := range testnet.Nodes {
if node.Stateless() {
continue
}
if node.HasStarted {
numRunningNodes++
}
}
timer := time.NewTimer(0)
defer timer.Stop()
for {
select {
case <-ctx.Done():
return nil, nil, ctx.Err()
case <-timer.C:
for _, node := range testnet.Nodes {
// skip nodes that have reached the target height
if _, ok := nodesAtHeight[node.Name]; ok {
continue
}
// skip nodes that don't have state or haven't started yet
if node.Stateless() {
continue
}
if !node.HasStarted {
continue
}
// cache the clients
client, ok := clients[node.Name]
if !ok {
client, err = node.Client()
if err != nil {
continue
}
clients[node.Name] = client
}
wctx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
result, err := client.Status(wctx)
if err != nil {
continue
}
if result.SyncInfo.LatestBlockHeight > lastHeight {
lastHeight = result.SyncInfo.LatestBlockHeight
lastIncrease = time.Now()
}
if result.SyncInfo.LatestBlockHeight >= height {
// the node has achieved the target height!
// add this node to the set of target
// height nodes
nodesAtHeight[node.Name] = struct{}{}
// if not all of the nodes that we
// have clients for have reached the
// target height, keep trying.
if numRunningNodes > len(nodesAtHeight) {
continue
}
// All nodes are at or above the target height. Now fetch the block for that target height
// and return it. We loop again through all clients because some may have pruning set but
// at least two of them should be archive nodes.
for _, c := range clients {
result, err := c.Block(ctx, &height)
if err != nil || result == nil || result.Block == nil {
continue
}
return result.Block, &result.BlockID, err
}
}
}
if len(clients) == 0 {
return nil, nil, errors.New("unable to connect to any network nodes")
}
if time.Since(lastIncrease) >= time.Minute {
if lastHeight == 0 {
return nil, nil, errors.New("chain stalled at unknown height (most likely upon starting)")
}
return nil, nil, fmt.Errorf("chain stalled at height %v [%d of %d nodes %+v]",
lastHeight,
len(nodesAtHeight),
numRunningNodes,
nodesAtHeight)
}
timer.Reset(1 * time.Second)
}
}
}
// waitForNode waits for a node to become available and catch up to the given block height.
func waitForNode(ctx context.Context, node *e2e.Node, height int64) (*rpctypes.ResultStatus, error) {
if node.Mode == e2e.ModeSeed {
return nil, nil
}
client, err := node.Client()
if err != nil {
return nil, err
}
timer := time.NewTimer(0)
defer timer.Stop()
var (
lastFailed bool
counter int
)
for {
counter++
if lastFailed {
lastFailed = false
// if there was a problem with the request in
// the previous recreate the client to ensure
// reconnection
client, err = node.Client()
if err != nil {
return nil, err
}
}
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-timer.C:
status, err := client.Status(ctx)
switch {
case errors.Is(err, context.DeadlineExceeded):
return nil, fmt.Errorf("timed out waiting for %v to reach height %v", node.Name, height)
case errors.Is(err, context.Canceled):
return nil, err
case err == nil && status.SyncInfo.LatestBlockHeight >= height:
return status, nil
case counter%500 == 0:
switch {
case err != nil:
lastFailed = true
logger.Error("node not yet ready",
"iter", counter,
"node", node.Name,
"target", height,
"err", err,
)
case status != nil:
logger.Info("node not yet ready",
"iter", counter,
"node", node.Name,
"height", status.SyncInfo.LatestBlockHeight,
"target", height,
)
}
}
timer.Reset(250 * time.Millisecond)
}
}
}
// getLatestBlock returns the last block that all active nodes in the network have
// agreed upon i.e. the earlist of each nodes latest block
func getLatestBlock(ctx context.Context, testnet *e2e.Testnet) (*types.Block, error) {
var earliestBlock *types.Block
for _, node := range testnet.Nodes {
// skip nodes that don't have state or haven't started yet
if node.Stateless() {
continue
}
if !node.HasStarted {
continue
}
client, err := node.Client()
if err != nil {
return nil, err
}
wctx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
result, err := client.Block(wctx, nil)
if err != nil {
return nil, err
}
if result.Block != nil && (earliestBlock == nil || earliestBlock.Height > result.Block.Height) {
earliestBlock = result.Block
}
}
return earliestBlock, nil
}