You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

232 lines
6.0 KiB

  1. package main
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "time"
  7. "github.com/tendermint/tendermint/libs/log"
  8. rpchttp "github.com/tendermint/tendermint/rpc/client/http"
  9. rpctypes "github.com/tendermint/tendermint/rpc/coretypes"
  10. e2e "github.com/tendermint/tendermint/test/e2e/pkg"
  11. "github.com/tendermint/tendermint/types"
  12. )
  13. // waitForHeight waits for the network to reach a certain height (or above),
  14. // returning the block at the height seen. Errors if the network is not making
  15. // progress at all.
  16. // If height == 0, the initial height of the test network is used as the target.
  17. func waitForHeight(ctx context.Context, testnet *e2e.Testnet, height int64) (*types.Block, *types.BlockID, error) {
  18. var (
  19. err error
  20. clients = map[string]*rpchttp.HTTP{}
  21. lastHeight int64
  22. lastIncrease = time.Now()
  23. nodesAtHeight = map[string]struct{}{}
  24. numRunningNodes int
  25. )
  26. if height == 0 {
  27. height = testnet.InitialHeight
  28. }
  29. for _, node := range testnet.Nodes {
  30. if node.Stateless() {
  31. continue
  32. }
  33. if node.HasStarted {
  34. numRunningNodes++
  35. }
  36. }
  37. timer := time.NewTimer(0)
  38. defer timer.Stop()
  39. for {
  40. select {
  41. case <-ctx.Done():
  42. return nil, nil, ctx.Err()
  43. case <-timer.C:
  44. for _, node := range testnet.Nodes {
  45. // skip nodes that have reached the target height
  46. if _, ok := nodesAtHeight[node.Name]; ok {
  47. continue
  48. }
  49. // skip nodes that don't have state or haven't started yet
  50. if node.Stateless() {
  51. continue
  52. }
  53. if !node.HasStarted {
  54. continue
  55. }
  56. // cache the clients
  57. client, ok := clients[node.Name]
  58. if !ok {
  59. client, err = node.Client()
  60. if err != nil {
  61. continue
  62. }
  63. clients[node.Name] = client
  64. }
  65. result, err := client.Status(ctx)
  66. if err != nil {
  67. continue
  68. }
  69. if result.SyncInfo.LatestBlockHeight > lastHeight {
  70. lastHeight = result.SyncInfo.LatestBlockHeight
  71. lastIncrease = time.Now()
  72. }
  73. if result.SyncInfo.LatestBlockHeight >= height {
  74. // the node has achieved the target height!
  75. // add this node to the set of target
  76. // height nodes
  77. nodesAtHeight[node.Name] = struct{}{}
  78. // if not all of the nodes that we
  79. // have clients for have reached the
  80. // target height, keep trying.
  81. if numRunningNodes > len(nodesAtHeight) {
  82. continue
  83. }
  84. // All nodes are at or above the target height. Now fetch the block for that target height
  85. // and return it. We loop again through all clients because some may have pruning set but
  86. // at least two of them should be archive nodes.
  87. for _, c := range clients {
  88. result, err := c.Block(ctx, &height)
  89. if err != nil || result == nil || result.Block == nil {
  90. continue
  91. }
  92. return result.Block, &result.BlockID, err
  93. }
  94. }
  95. }
  96. if len(clients) == 0 {
  97. return nil, nil, errors.New("unable to connect to any network nodes")
  98. }
  99. if time.Since(lastIncrease) >= time.Minute {
  100. if lastHeight == 0 {
  101. return nil, nil, errors.New("chain stalled at unknown height (most likely upon starting)")
  102. }
  103. return nil, nil, fmt.Errorf("chain stalled at height %v [%d of %d nodes %+v]",
  104. lastHeight,
  105. len(nodesAtHeight),
  106. numRunningNodes,
  107. nodesAtHeight)
  108. }
  109. timer.Reset(1 * time.Second)
  110. }
  111. }
  112. }
  113. // waitForNode waits for a node to become available and catch up to the given block height.
  114. func waitForNode(ctx context.Context, logger log.Logger, node *e2e.Node, height int64) (*rpctypes.ResultStatus, error) {
  115. // If the node is the light client or seed note, we do not check for the last height.
  116. // The light client and seed note can be behind the full node and validator
  117. if node.Mode == e2e.ModeSeed {
  118. return nil, nil
  119. }
  120. client, err := node.Client()
  121. if err != nil {
  122. return nil, err
  123. }
  124. timer := time.NewTimer(0)
  125. defer timer.Stop()
  126. var (
  127. lastFailed bool
  128. counter int
  129. )
  130. for {
  131. counter++
  132. if lastFailed {
  133. lastFailed = false
  134. // if there was a problem with the request in
  135. // the previous recreate the client to ensure
  136. // reconnection
  137. client, err = node.Client()
  138. if err != nil {
  139. return nil, err
  140. }
  141. }
  142. select {
  143. case <-ctx.Done():
  144. return nil, ctx.Err()
  145. case <-timer.C:
  146. status, err := client.Status(ctx)
  147. switch {
  148. case errors.Is(err, context.DeadlineExceeded):
  149. return nil, fmt.Errorf("timed out waiting for %v to reach height %v", node.Name, height)
  150. case errors.Is(err, context.Canceled):
  151. return nil, err
  152. // If the node is the light client, it is not essential to wait for it to catch up, but we must return status info
  153. case err == nil && node.Mode == e2e.ModeLight:
  154. return status, nil
  155. case err == nil && node.Mode != e2e.ModeLight && status.SyncInfo.LatestBlockHeight >= height:
  156. return status, nil
  157. case counter%500 == 0:
  158. switch {
  159. case err != nil:
  160. lastFailed = true
  161. logger.Error("node not yet ready",
  162. "iter", counter,
  163. "node", node.Name,
  164. "target", height,
  165. "err", err,
  166. )
  167. case status != nil:
  168. logger.Info("node not yet ready",
  169. "iter", counter,
  170. "node", node.Name,
  171. "height", status.SyncInfo.LatestBlockHeight,
  172. "target", height,
  173. )
  174. }
  175. }
  176. timer.Reset(250 * time.Millisecond)
  177. }
  178. }
  179. }
  180. // getLatestBlock returns the last block that all active nodes in the network have
  181. // agreed upon i.e. the earlist of each nodes latest block
  182. func getLatestBlock(ctx context.Context, testnet *e2e.Testnet) (*types.Block, error) {
  183. var earliestBlock *types.Block
  184. for _, node := range testnet.Nodes {
  185. // skip nodes that don't have state or haven't started yet
  186. if node.Stateless() {
  187. continue
  188. }
  189. if !node.HasStarted {
  190. continue
  191. }
  192. client, err := node.Client()
  193. if err != nil {
  194. return nil, err
  195. }
  196. wctx, cancel := context.WithTimeout(ctx, 10*time.Second)
  197. defer cancel()
  198. result, err := client.Block(wctx, nil)
  199. if err != nil {
  200. return nil, err
  201. }
  202. if result.Block != nil && (earliestBlock == nil || earliestBlock.Height > result.Block.Height) {
  203. earliestBlock = result.Block
  204. }
  205. }
  206. return earliestBlock, nil
  207. }