You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

228 lines
5.6 KiB

  1. package main
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "time"
  7. rpchttp "github.com/tendermint/tendermint/rpc/client/http"
  8. rpctypes "github.com/tendermint/tendermint/rpc/core/types"
  9. e2e "github.com/tendermint/tendermint/test/e2e/pkg"
  10. "github.com/tendermint/tendermint/types"
  11. )
  12. // waitForHeight waits for the network to reach a certain height (or above),
  13. // returning the block at the height seen. Errors if the network is not making
  14. // progress at all.
  15. // If height == 0, the initial height of the test network is used as the target.
  16. func waitForHeight(ctx context.Context, testnet *e2e.Testnet, height int64) (*types.Block, *types.BlockID, error) {
  17. var (
  18. err error
  19. clients = map[string]*rpchttp.HTTP{}
  20. lastHeight int64
  21. lastIncrease = time.Now()
  22. nodesAtHeight = map[string]struct{}{}
  23. numRunningNodes int
  24. )
  25. if height == 0 {
  26. height = testnet.InitialHeight
  27. }
  28. for _, node := range testnet.Nodes {
  29. if node.Stateless() {
  30. continue
  31. }
  32. if node.HasStarted {
  33. numRunningNodes++
  34. }
  35. }
  36. timer := time.NewTimer(0)
  37. defer timer.Stop()
  38. for {
  39. select {
  40. case <-ctx.Done():
  41. return nil, nil, ctx.Err()
  42. case <-timer.C:
  43. for _, node := range testnet.Nodes {
  44. // skip nodes that have reached the target height
  45. if _, ok := nodesAtHeight[node.Name]; ok {
  46. continue
  47. }
  48. // skip nodes that don't have state or haven't started yet
  49. if node.Stateless() {
  50. continue
  51. }
  52. if !node.HasStarted {
  53. continue
  54. }
  55. // cache the clients
  56. client, ok := clients[node.Name]
  57. if !ok {
  58. client, err = node.Client()
  59. if err != nil {
  60. continue
  61. }
  62. clients[node.Name] = client
  63. }
  64. wctx, cancel := context.WithTimeout(ctx, 10*time.Second)
  65. defer cancel()
  66. result, err := client.Status(wctx)
  67. if err != nil {
  68. continue
  69. }
  70. if result.SyncInfo.LatestBlockHeight > lastHeight {
  71. lastHeight = result.SyncInfo.LatestBlockHeight
  72. lastIncrease = time.Now()
  73. }
  74. if result.SyncInfo.LatestBlockHeight >= height {
  75. // the node has achieved the target height!
  76. // add this node to the set of target
  77. // height nodes
  78. nodesAtHeight[node.Name] = struct{}{}
  79. // if not all of the nodes that we
  80. // have clients for have reached the
  81. // target height, keep trying.
  82. if numRunningNodes > len(nodesAtHeight) {
  83. continue
  84. }
  85. // All nodes are at or above the target height. Now fetch the block for that target height
  86. // and return it. We loop again through all clients because some may have pruning set but
  87. // at least two of them should be archive nodes.
  88. for _, c := range clients {
  89. result, err := c.Block(ctx, &height)
  90. if err != nil || result == nil || result.Block == nil {
  91. continue
  92. }
  93. return result.Block, &result.BlockID, err
  94. }
  95. }
  96. }
  97. if len(clients) == 0 {
  98. return nil, nil, errors.New("unable to connect to any network nodes")
  99. }
  100. if time.Since(lastIncrease) >= time.Minute {
  101. if lastHeight == 0 {
  102. return nil, nil, errors.New("chain stalled at unknown height (most likely upon starting)")
  103. }
  104. return nil, nil, fmt.Errorf("chain stalled at height %v [%d of %d nodes %+v]",
  105. lastHeight,
  106. len(nodesAtHeight),
  107. numRunningNodes,
  108. nodesAtHeight)
  109. }
  110. timer.Reset(1 * time.Second)
  111. }
  112. }
  113. }
  114. // waitForNode waits for a node to become available and catch up to the given block height.
  115. func waitForNode(ctx context.Context, node *e2e.Node, height int64) (*rpctypes.ResultStatus, error) {
  116. if node.Mode == e2e.ModeSeed {
  117. return nil, nil
  118. }
  119. client, err := node.Client()
  120. if err != nil {
  121. return nil, err
  122. }
  123. timer := time.NewTimer(0)
  124. defer timer.Stop()
  125. var (
  126. lastFailed bool
  127. counter int
  128. )
  129. for {
  130. counter++
  131. if lastFailed {
  132. lastFailed = false
  133. // if there was a problem with the request in
  134. // the previous recreate the client to ensure
  135. // reconnection
  136. client, err = node.Client()
  137. if err != nil {
  138. return nil, err
  139. }
  140. }
  141. select {
  142. case <-ctx.Done():
  143. return nil, ctx.Err()
  144. case <-timer.C:
  145. status, err := client.Status(ctx)
  146. switch {
  147. case errors.Is(err, context.DeadlineExceeded):
  148. return nil, fmt.Errorf("timed out waiting for %v to reach height %v", node.Name, height)
  149. case errors.Is(err, context.Canceled):
  150. return nil, err
  151. case err == nil && status.SyncInfo.LatestBlockHeight >= height:
  152. return status, nil
  153. case counter%100 == 0:
  154. switch {
  155. case err != nil:
  156. lastFailed = true
  157. logger.Error("node not yet ready",
  158. "iter", counter,
  159. "node", node.Name,
  160. "err", err,
  161. "target", height,
  162. )
  163. case status != nil:
  164. logger.Error("node not yet ready",
  165. "iter", counter,
  166. "node", node.Name,
  167. "height", status.SyncInfo.LatestBlockHeight,
  168. "target", height,
  169. )
  170. }
  171. }
  172. timer.Reset(250 * time.Millisecond)
  173. }
  174. }
  175. }
  176. // getLatestBlock returns the last block that all active nodes in the network have
  177. // agreed upon i.e. the earlist of each nodes latest block
  178. func getLatestBlock(ctx context.Context, testnet *e2e.Testnet) (*types.Block, error) {
  179. var earliestBlock *types.Block
  180. for _, node := range testnet.Nodes {
  181. // skip nodes that don't have state or haven't started yet
  182. if node.Stateless() {
  183. continue
  184. }
  185. if !node.HasStarted {
  186. continue
  187. }
  188. client, err := node.Client()
  189. if err != nil {
  190. return nil, err
  191. }
  192. wctx, cancel := context.WithTimeout(ctx, 10*time.Second)
  193. defer cancel()
  194. result, err := client.Block(wctx, nil)
  195. if err != nil {
  196. return nil, err
  197. }
  198. if result.Block != nil && (earliestBlock == nil || earliestBlock.Height > result.Block.Height) {
  199. earliestBlock = result.Block
  200. }
  201. }
  202. return earliestBlock, nil
  203. }