You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

184 lines
4.3 KiB

  1. package main
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "time"
  7. rpchttp "github.com/tendermint/tendermint/rpc/client/http"
  8. rpctypes "github.com/tendermint/tendermint/rpc/core/types"
  9. e2e "github.com/tendermint/tendermint/test/e2e/pkg"
  10. "github.com/tendermint/tendermint/types"
  11. )
  12. // waitForHeight waits for the network to reach a certain height (or above),
  13. // returning the highest height seen. Errors if the network is not making
  14. // progress at all.
  15. func waitForHeight(ctx context.Context, testnet *e2e.Testnet, height int64) (*types.Block, *types.BlockID, error) {
  16. var (
  17. err error
  18. maxResult *rpctypes.ResultBlock
  19. clients = map[string]*rpchttp.HTTP{}
  20. lastIncrease = time.Now()
  21. nodesAtHeight = map[string]struct{}{}
  22. numRunningNodes int
  23. )
  24. for _, node := range testnet.Nodes {
  25. if node.Stateless() {
  26. continue
  27. }
  28. if node.HasStarted {
  29. numRunningNodes++
  30. }
  31. }
  32. timer := time.NewTimer(0)
  33. defer timer.Stop()
  34. for {
  35. select {
  36. case <-ctx.Done():
  37. return nil, nil, ctx.Err()
  38. case <-timer.C:
  39. for _, node := range testnet.Nodes {
  40. // skip nodes that have reached the target height
  41. if _, ok := nodesAtHeight[node.Name]; ok {
  42. continue
  43. }
  44. if node.Stateless() {
  45. continue
  46. }
  47. if !node.HasStarted {
  48. continue
  49. }
  50. // cache the clients
  51. client, ok := clients[node.Name]
  52. if !ok {
  53. client, err = node.Client()
  54. if err != nil {
  55. continue
  56. }
  57. clients[node.Name] = client
  58. }
  59. wctx, cancel := context.WithTimeout(ctx, 10*time.Second)
  60. defer cancel()
  61. result, err := client.Block(wctx, nil)
  62. if err != nil {
  63. continue
  64. }
  65. if result.Block != nil && (maxResult == nil || result.Block.Height > maxResult.Block.Height) {
  66. maxResult = result
  67. lastIncrease = time.Now()
  68. }
  69. if maxResult != nil && maxResult.Block.Height >= height {
  70. // the node has achieved the target height!
  71. // add this node to the set of target
  72. // height nodes
  73. nodesAtHeight[node.Name] = struct{}{}
  74. // if not all of the nodes that we
  75. // have clients for have reached the
  76. // target height, keep trying.
  77. if numRunningNodes > len(nodesAtHeight) {
  78. continue
  79. }
  80. // return once all nodes have reached
  81. // the target height.
  82. return maxResult.Block, &maxResult.BlockID, nil
  83. }
  84. }
  85. if len(clients) == 0 {
  86. return nil, nil, errors.New("unable to connect to any network nodes")
  87. }
  88. if time.Since(lastIncrease) >= time.Minute {
  89. if maxResult == nil {
  90. return nil, nil, errors.New("chain stalled at unknown height")
  91. }
  92. return nil, nil, fmt.Errorf("chain stalled at height %v [%d of %d nodes %+v]",
  93. maxResult.Block.Height,
  94. len(nodesAtHeight),
  95. numRunningNodes,
  96. nodesAtHeight)
  97. }
  98. timer.Reset(1 * time.Second)
  99. }
  100. }
  101. }
  102. // waitForNode waits for a node to become available and catch up to the given block height.
  103. func waitForNode(ctx context.Context, node *e2e.Node, height int64) (*rpctypes.ResultStatus, error) {
  104. if node.Mode == e2e.ModeSeed {
  105. return nil, nil
  106. }
  107. client, err := node.Client()
  108. if err != nil {
  109. return nil, err
  110. }
  111. timer := time.NewTimer(0)
  112. defer timer.Stop()
  113. var (
  114. lastFailed bool
  115. counter int
  116. )
  117. for {
  118. counter++
  119. if lastFailed {
  120. lastFailed = false
  121. // if there was a problem with the request in
  122. // the previous recreate the client to ensure
  123. // reconnection
  124. client, err = node.Client()
  125. if err != nil {
  126. return nil, err
  127. }
  128. }
  129. select {
  130. case <-ctx.Done():
  131. return nil, ctx.Err()
  132. case <-timer.C:
  133. status, err := client.Status(ctx)
  134. switch {
  135. case errors.Is(err, context.DeadlineExceeded):
  136. return nil, fmt.Errorf("timed out waiting for %v to reach height %v", node.Name, height)
  137. case errors.Is(err, context.Canceled):
  138. return nil, err
  139. case err == nil && status.SyncInfo.LatestBlockHeight >= height:
  140. return status, nil
  141. case counter%100 == 0:
  142. switch {
  143. case err != nil:
  144. lastFailed = true
  145. logger.Error("node not yet ready",
  146. "iter", counter,
  147. "node", node.Name,
  148. "err", err,
  149. "target", height,
  150. )
  151. case status != nil:
  152. logger.Error("node not yet ready",
  153. "iter", counter,
  154. "node", node.Name,
  155. "height", status.SyncInfo.LatestBlockHeight,
  156. "target", height,
  157. )
  158. }
  159. }
  160. timer.Reset(250 * time.Millisecond)
  161. }
  162. }
  163. }