You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

226 lines
5.5 KiB

  1. package main
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "time"
  7. rpchttp "github.com/tendermint/tendermint/rpc/client/http"
  8. rpctypes "github.com/tendermint/tendermint/rpc/coretypes"
  9. e2e "github.com/tendermint/tendermint/test/e2e/pkg"
  10. "github.com/tendermint/tendermint/types"
  11. )
  12. // waitForHeight waits for the network to reach a certain height (or above),
  13. // returning the block at the height seen. Errors if the network is not making
  14. // progress at all.
  15. // If height == 0, the initial height of the test network is used as the target.
  16. func waitForHeight(ctx context.Context, testnet *e2e.Testnet, height int64) (*types.Block, *types.BlockID, error) {
  17. var (
  18. err error
  19. clients = map[string]*rpchttp.HTTP{}
  20. lastHeight int64
  21. lastIncrease = time.Now()
  22. nodesAtHeight = map[string]struct{}{}
  23. numRunningNodes int
  24. )
  25. if height == 0 {
  26. height = testnet.InitialHeight
  27. }
  28. for _, node := range testnet.Nodes {
  29. if node.Stateless() {
  30. continue
  31. }
  32. if node.HasStarted {
  33. numRunningNodes++
  34. }
  35. }
  36. timer := time.NewTimer(0)
  37. defer timer.Stop()
  38. for {
  39. select {
  40. case <-ctx.Done():
  41. return nil, nil, ctx.Err()
  42. case <-timer.C:
  43. for _, node := range testnet.Nodes {
  44. // skip nodes that have reached the target height
  45. if _, ok := nodesAtHeight[node.Name]; ok {
  46. continue
  47. }
  48. // skip nodes that don't have state or haven't started yet
  49. if node.Stateless() {
  50. continue
  51. }
  52. if !node.HasStarted {
  53. continue
  54. }
  55. // cache the clients
  56. client, ok := clients[node.Name]
  57. if !ok {
  58. client, err = node.Client()
  59. if err != nil {
  60. continue
  61. }
  62. clients[node.Name] = client
  63. }
  64. result, err := client.Status(ctx)
  65. if err != nil {
  66. continue
  67. }
  68. if result.SyncInfo.LatestBlockHeight > lastHeight {
  69. lastHeight = result.SyncInfo.LatestBlockHeight
  70. lastIncrease = time.Now()
  71. }
  72. if result.SyncInfo.LatestBlockHeight >= height {
  73. // the node has achieved the target height!
  74. // add this node to the set of target
  75. // height nodes
  76. nodesAtHeight[node.Name] = struct{}{}
  77. // if not all of the nodes that we
  78. // have clients for have reached the
  79. // target height, keep trying.
  80. if numRunningNodes > len(nodesAtHeight) {
  81. continue
  82. }
  83. // All nodes are at or above the target height. Now fetch the block for that target height
  84. // and return it. We loop again through all clients because some may have pruning set but
  85. // at least two of them should be archive nodes.
  86. for _, c := range clients {
  87. result, err := c.Block(ctx, &height)
  88. if err != nil || result == nil || result.Block == nil {
  89. continue
  90. }
  91. return result.Block, &result.BlockID, err
  92. }
  93. }
  94. }
  95. if len(clients) == 0 {
  96. return nil, nil, errors.New("unable to connect to any network nodes")
  97. }
  98. if time.Since(lastIncrease) >= time.Minute {
  99. if lastHeight == 0 {
  100. return nil, nil, errors.New("chain stalled at unknown height (most likely upon starting)")
  101. }
  102. return nil, nil, fmt.Errorf("chain stalled at height %v [%d of %d nodes %+v]",
  103. lastHeight,
  104. len(nodesAtHeight),
  105. numRunningNodes,
  106. nodesAtHeight)
  107. }
  108. timer.Reset(1 * time.Second)
  109. }
  110. }
  111. }
  112. // waitForNode waits for a node to become available and catch up to the given block height.
  113. func waitForNode(ctx context.Context, node *e2e.Node, height int64) (*rpctypes.ResultStatus, error) {
  114. if node.Mode == e2e.ModeSeed {
  115. return nil, nil
  116. }
  117. client, err := node.Client()
  118. if err != nil {
  119. return nil, err
  120. }
  121. timer := time.NewTimer(0)
  122. defer timer.Stop()
  123. var (
  124. lastFailed bool
  125. counter int
  126. )
  127. for {
  128. counter++
  129. if lastFailed {
  130. lastFailed = false
  131. // if there was a problem with the request in
  132. // the previous recreate the client to ensure
  133. // reconnection
  134. client, err = node.Client()
  135. if err != nil {
  136. return nil, err
  137. }
  138. }
  139. select {
  140. case <-ctx.Done():
  141. return nil, ctx.Err()
  142. case <-timer.C:
  143. status, err := client.Status(ctx)
  144. switch {
  145. case errors.Is(err, context.DeadlineExceeded):
  146. return nil, fmt.Errorf("timed out waiting for %v to reach height %v", node.Name, height)
  147. case errors.Is(err, context.Canceled):
  148. return nil, err
  149. case err == nil && status.SyncInfo.LatestBlockHeight >= height:
  150. return status, nil
  151. case counter%500 == 0:
  152. switch {
  153. case err != nil:
  154. lastFailed = true
  155. logger.Error("node not yet ready",
  156. "iter", counter,
  157. "node", node.Name,
  158. "target", height,
  159. "err", err,
  160. )
  161. case status != nil:
  162. logger.Info("node not yet ready",
  163. "iter", counter,
  164. "node", node.Name,
  165. "height", status.SyncInfo.LatestBlockHeight,
  166. "target", height,
  167. )
  168. }
  169. }
  170. timer.Reset(250 * time.Millisecond)
  171. }
  172. }
  173. }
  174. // getLatestBlock returns the last block that all active nodes in the network have
  175. // agreed upon i.e. the earlist of each nodes latest block
  176. func getLatestBlock(ctx context.Context, testnet *e2e.Testnet) (*types.Block, error) {
  177. var earliestBlock *types.Block
  178. for _, node := range testnet.Nodes {
  179. // skip nodes that don't have state or haven't started yet
  180. if node.Stateless() {
  181. continue
  182. }
  183. if !node.HasStarted {
  184. continue
  185. }
  186. client, err := node.Client()
  187. if err != nil {
  188. return nil, err
  189. }
  190. wctx, cancel := context.WithTimeout(ctx, 10*time.Second)
  191. defer cancel()
  192. result, err := client.Block(wctx, nil)
  193. if err != nil {
  194. return nil, err
  195. }
  196. if result.Block != nil && (earliestBlock == nil || earliestBlock.Height > result.Block.Height) {
  197. earliestBlock = result.Block
  198. }
  199. }
  200. return earliestBlock, nil
  201. }