You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

184 lines
4.3 KiB

package main
import (
"context"
"errors"
"fmt"
"time"
rpchttp "github.com/tendermint/tendermint/rpc/client/http"
rpctypes "github.com/tendermint/tendermint/rpc/core/types"
e2e "github.com/tendermint/tendermint/test/e2e/pkg"
"github.com/tendermint/tendermint/types"
)
// waitForHeight waits for the network to reach a certain height (or above),
// returning the highest height seen. Errors if the network is not making
// progress at all.
func waitForHeight(ctx context.Context, testnet *e2e.Testnet, height int64) (*types.Block, *types.BlockID, error) {
var (
err error
maxResult *rpctypes.ResultBlock
clients = map[string]*rpchttp.HTTP{}
lastIncrease = time.Now()
nodesAtHeight = map[string]struct{}{}
numRunningNodes int
)
for _, node := range testnet.Nodes {
if node.Stateless() {
continue
}
if node.HasStarted {
numRunningNodes++
}
}
timer := time.NewTimer(0)
defer timer.Stop()
for {
select {
case <-ctx.Done():
return nil, nil, ctx.Err()
case <-timer.C:
for _, node := range testnet.Nodes {
// skip nodes that have reached the target height
if _, ok := nodesAtHeight[node.Name]; ok {
continue
}
if node.Stateless() {
continue
}
if !node.HasStarted {
continue
}
// cache the clients
client, ok := clients[node.Name]
if !ok {
client, err = node.Client()
if err != nil {
continue
}
clients[node.Name] = client
}
wctx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
result, err := client.Block(wctx, nil)
if err != nil {
continue
}
if result.Block != nil && (maxResult == nil || result.Block.Height > maxResult.Block.Height) {
maxResult = result
lastIncrease = time.Now()
}
if maxResult != nil && maxResult.Block.Height >= height {
// the node has achieved the target height!
// add this node to the set of target
// height nodes
nodesAtHeight[node.Name] = struct{}{}
// if not all of the nodes that we
// have clients for have reached the
// target height, keep trying.
if numRunningNodes > len(nodesAtHeight) {
continue
}
// return once all nodes have reached
// the target height.
return maxResult.Block, &maxResult.BlockID, nil
}
}
if len(clients) == 0 {
return nil, nil, errors.New("unable to connect to any network nodes")
}
if time.Since(lastIncrease) >= time.Minute {
if maxResult == nil {
return nil, nil, errors.New("chain stalled at unknown height")
}
return nil, nil, fmt.Errorf("chain stalled at height %v [%d of %d nodes %+v]",
maxResult.Block.Height,
len(nodesAtHeight),
numRunningNodes,
nodesAtHeight)
}
timer.Reset(1 * time.Second)
}
}
}
// waitForNode waits for a node to become available and catch up to the given block height.
func waitForNode(ctx context.Context, node *e2e.Node, height int64) (*rpctypes.ResultStatus, error) {
if node.Mode == e2e.ModeSeed {
return nil, nil
}
client, err := node.Client()
if err != nil {
return nil, err
}
timer := time.NewTimer(0)
defer timer.Stop()
var (
lastFailed bool
counter int
)
for {
counter++
if lastFailed {
lastFailed = false
// if there was a problem with the request in
// the previous recreate the client to ensure
// reconnection
client, err = node.Client()
if err != nil {
return nil, err
}
}
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-timer.C:
status, err := client.Status(ctx)
switch {
case errors.Is(err, context.DeadlineExceeded):
return nil, fmt.Errorf("timed out waiting for %v to reach height %v", node.Name, height)
case errors.Is(err, context.Canceled):
return nil, err
case err == nil && status.SyncInfo.LatestBlockHeight >= height:
return status, nil
case counter%100 == 0:
switch {
case err != nil:
lastFailed = true
logger.Error("node not yet ready",
"iter", counter,
"node", node.Name,
"err", err,
"target", height,
)
case status != nil:
logger.Error("node not yet ready",
"iter", counter,
"node", node.Name,
"height", status.SyncInfo.LatestBlockHeight,
"target", height,
)
}
}
timer.Reset(250 * time.Millisecond)
}
}
}