You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

231 lines
5.9 KiB

  1. package main
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "time"
  7. rpchttp "github.com/tendermint/tendermint/rpc/client/http"
  8. rpctypes "github.com/tendermint/tendermint/rpc/coretypes"
  9. e2e "github.com/tendermint/tendermint/test/e2e/pkg"
  10. "github.com/tendermint/tendermint/types"
  11. )
  12. // waitForHeight waits for the network to reach a certain height (or above),
  13. // returning the block at the height seen. Errors if the network is not making
  14. // progress at all.
  15. // If height == 0, the initial height of the test network is used as the target.
  16. func waitForHeight(ctx context.Context, testnet *e2e.Testnet, height int64) (*types.Block, *types.BlockID, error) {
  17. var (
  18. err error
  19. clients = map[string]*rpchttp.HTTP{}
  20. lastHeight int64
  21. lastIncrease = time.Now()
  22. nodesAtHeight = map[string]struct{}{}
  23. numRunningNodes int
  24. )
  25. if height == 0 {
  26. height = testnet.InitialHeight
  27. }
  28. for _, node := range testnet.Nodes {
  29. if node.Stateless() {
  30. continue
  31. }
  32. if node.HasStarted {
  33. numRunningNodes++
  34. }
  35. }
  36. timer := time.NewTimer(0)
  37. defer timer.Stop()
  38. for {
  39. select {
  40. case <-ctx.Done():
  41. return nil, nil, ctx.Err()
  42. case <-timer.C:
  43. for _, node := range testnet.Nodes {
  44. // skip nodes that have reached the target height
  45. if _, ok := nodesAtHeight[node.Name]; ok {
  46. continue
  47. }
  48. // skip nodes that don't have state or haven't started yet
  49. if node.Stateless() {
  50. continue
  51. }
  52. if !node.HasStarted {
  53. continue
  54. }
  55. // cache the clients
  56. client, ok := clients[node.Name]
  57. if !ok {
  58. client, err = node.Client()
  59. if err != nil {
  60. continue
  61. }
  62. clients[node.Name] = client
  63. }
  64. result, err := client.Status(ctx)
  65. if err != nil {
  66. continue
  67. }
  68. if result.SyncInfo.LatestBlockHeight > lastHeight {
  69. lastHeight = result.SyncInfo.LatestBlockHeight
  70. lastIncrease = time.Now()
  71. }
  72. if result.SyncInfo.LatestBlockHeight >= height {
  73. // the node has achieved the target height!
  74. // add this node to the set of target
  75. // height nodes
  76. nodesAtHeight[node.Name] = struct{}{}
  77. // if not all of the nodes that we
  78. // have clients for have reached the
  79. // target height, keep trying.
  80. if numRunningNodes > len(nodesAtHeight) {
  81. continue
  82. }
  83. // All nodes are at or above the target height. Now fetch the block for that target height
  84. // and return it. We loop again through all clients because some may have pruning set but
  85. // at least two of them should be archive nodes.
  86. for _, c := range clients {
  87. result, err := c.Block(ctx, &height)
  88. if err != nil || result == nil || result.Block == nil {
  89. continue
  90. }
  91. return result.Block, &result.BlockID, err
  92. }
  93. }
  94. }
  95. if len(clients) == 0 {
  96. return nil, nil, errors.New("unable to connect to any network nodes")
  97. }
  98. if time.Since(lastIncrease) >= time.Minute {
  99. if lastHeight == 0 {
  100. return nil, nil, errors.New("chain stalled at unknown height (most likely upon starting)")
  101. }
  102. return nil, nil, fmt.Errorf("chain stalled at height %v [%d of %d nodes %+v]",
  103. lastHeight,
  104. len(nodesAtHeight),
  105. numRunningNodes,
  106. nodesAtHeight)
  107. }
  108. timer.Reset(1 * time.Second)
  109. }
  110. }
  111. }
  112. // waitForNode waits for a node to become available and catch up to the given block height.
  113. func waitForNode(ctx context.Context, node *e2e.Node, height int64) (*rpctypes.ResultStatus, error) {
  114. // If the node is the light client or seed note, we do not check for the last height.
  115. // The light client and seed note can be behind the full node and validator
  116. if node.Mode == e2e.ModeSeed {
  117. return nil, nil
  118. }
  119. client, err := node.Client()
  120. if err != nil {
  121. return nil, err
  122. }
  123. timer := time.NewTimer(0)
  124. defer timer.Stop()
  125. var (
  126. lastFailed bool
  127. counter int
  128. )
  129. for {
  130. counter++
  131. if lastFailed {
  132. lastFailed = false
  133. // if there was a problem with the request in
  134. // the previous recreate the client to ensure
  135. // reconnection
  136. client, err = node.Client()
  137. if err != nil {
  138. return nil, err
  139. }
  140. }
  141. select {
  142. case <-ctx.Done():
  143. return nil, ctx.Err()
  144. case <-timer.C:
  145. status, err := client.Status(ctx)
  146. switch {
  147. case errors.Is(err, context.DeadlineExceeded):
  148. return nil, fmt.Errorf("timed out waiting for %v to reach height %v", node.Name, height)
  149. case errors.Is(err, context.Canceled):
  150. return nil, err
  151. // If the node is the light client, it is not essential to wait for it to catch up, but we must return status info
  152. case err == nil && node.Mode == e2e.ModeLight:
  153. return status, nil
  154. case err == nil && node.Mode != e2e.ModeLight && status.SyncInfo.LatestBlockHeight >= height:
  155. return status, nil
  156. case counter%500 == 0:
  157. switch {
  158. case err != nil:
  159. lastFailed = true
  160. logger.Error("node not yet ready",
  161. "iter", counter,
  162. "node", node.Name,
  163. "target", height,
  164. "err", err,
  165. )
  166. case status != nil:
  167. logger.Info("node not yet ready",
  168. "iter", counter,
  169. "node", node.Name,
  170. "height", status.SyncInfo.LatestBlockHeight,
  171. "target", height,
  172. )
  173. }
  174. }
  175. timer.Reset(250 * time.Millisecond)
  176. }
  177. }
  178. }
  179. // getLatestBlock returns the last block that all active nodes in the network have
  180. // agreed upon i.e. the earlist of each nodes latest block
  181. func getLatestBlock(ctx context.Context, testnet *e2e.Testnet) (*types.Block, error) {
  182. var earliestBlock *types.Block
  183. for _, node := range testnet.Nodes {
  184. // skip nodes that don't have state or haven't started yet
  185. if node.Stateless() {
  186. continue
  187. }
  188. if !node.HasStarted {
  189. continue
  190. }
  191. client, err := node.Client()
  192. if err != nil {
  193. return nil, err
  194. }
  195. wctx, cancel := context.WithTimeout(ctx, 10*time.Second)
  196. defer cancel()
  197. result, err := client.Block(wctx, nil)
  198. if err != nil {
  199. return nil, err
  200. }
  201. if result.Block != nil && (earliestBlock == nil || earliestBlock.Height > result.Block.Height) {
  202. earliestBlock = result.Block
  203. }
  204. }
  205. return earliestBlock, nil
  206. }