You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

463 lines
15 KiB

  1. package statesync
  2. import (
  3. "bytes"
  4. "context"
  5. "fmt"
  6. "math/rand"
  7. "strings"
  8. "sync"
  9. "time"
  10. dbm "github.com/tendermint/tm-db"
  11. "github.com/tendermint/tendermint/internal/p2p"
  12. sm "github.com/tendermint/tendermint/internal/state"
  13. "github.com/tendermint/tendermint/libs/log"
  14. "github.com/tendermint/tendermint/light"
  15. lightprovider "github.com/tendermint/tendermint/light/provider"
  16. lighthttp "github.com/tendermint/tendermint/light/provider/http"
  17. lightrpc "github.com/tendermint/tendermint/light/rpc"
  18. lightdb "github.com/tendermint/tendermint/light/store/db"
  19. ssproto "github.com/tendermint/tendermint/proto/tendermint/statesync"
  20. rpchttp "github.com/tendermint/tendermint/rpc/client/http"
  21. "github.com/tendermint/tendermint/types"
  22. "github.com/tendermint/tendermint/version"
  23. )
  24. //go:generate ../../scripts/mockery_generate.sh StateProvider
  25. // StateProvider is a provider of trusted state data for bootstrapping a node. This refers
  26. // to the state.State object, not the state machine. There are two implementations. One
  27. // uses the P2P layer and the other uses the RPC layer. Both use light client verification.
  28. type StateProvider interface {
  29. // AppHash returns the app hash after the given height has been committed.
  30. AppHash(ctx context.Context, height uint64) ([]byte, error)
  31. // Commit returns the commit at the given height.
  32. Commit(ctx context.Context, height uint64) (*types.Commit, error)
  33. // State returns a state object at the given height.
  34. State(ctx context.Context, height uint64) (sm.State, error)
  35. }
  36. type stateProviderRPC struct {
  37. sync.Mutex // light.Client is not concurrency-safe
  38. lc *light.Client
  39. initialHeight int64
  40. providers map[lightprovider.Provider]string
  41. logger log.Logger
  42. }
  43. // NewRPCStateProvider creates a new StateProvider using a light client and RPC clients.
  44. func NewRPCStateProvider(
  45. ctx context.Context,
  46. chainID string,
  47. initialHeight int64,
  48. servers []string,
  49. trustOptions light.TrustOptions,
  50. logger log.Logger,
  51. ) (StateProvider, error) {
  52. if len(servers) < 2 {
  53. return nil, fmt.Errorf("at least 2 RPC servers are required, got %d", len(servers))
  54. }
  55. providers := make([]lightprovider.Provider, 0, len(servers))
  56. providerRemotes := make(map[lightprovider.Provider]string)
  57. for _, server := range servers {
  58. client, err := rpcClient(server)
  59. if err != nil {
  60. return nil, fmt.Errorf("failed to set up RPC client: %w", err)
  61. }
  62. provider := lighthttp.NewWithClient(chainID, client)
  63. providers = append(providers, provider)
  64. // We store the RPC addresses keyed by provider, so we can find the address of the primary
  65. // provider used by the light client and use it to fetch consensus parameters.
  66. providerRemotes[provider] = server
  67. }
  68. lc, err := light.NewClient(ctx, chainID, trustOptions, providers[0], providers[1:],
  69. lightdb.New(dbm.NewMemDB()), light.Logger(logger))
  70. if err != nil {
  71. return nil, err
  72. }
  73. return &stateProviderRPC{
  74. logger: logger,
  75. lc: lc,
  76. initialHeight: initialHeight,
  77. providers: providerRemotes,
  78. }, nil
  79. }
  80. func (s *stateProviderRPC) verifyLightBlockAtHeight(ctx context.Context, height uint64, ts time.Time) (*types.LightBlock, error) {
  81. ctx, cancel := context.WithTimeout(ctx, 20*time.Second)
  82. defer cancel()
  83. return s.lc.VerifyLightBlockAtHeight(ctx, int64(height), ts)
  84. }
  85. // AppHash implements part of StateProvider. It calls the application to verify the
  86. // light blocks at heights h+1 and h+2 and, if verification succeeds, reports the app
  87. // hash for the block at height h+1 which correlates to the state at height h.
  88. func (s *stateProviderRPC) AppHash(ctx context.Context, height uint64) ([]byte, error) {
  89. s.Lock()
  90. defer s.Unlock()
  91. // We have to fetch the next height, which contains the app hash for the previous height.
  92. header, err := s.verifyLightBlockAtHeight(ctx, height+1, time.Now())
  93. if err != nil {
  94. return nil, err
  95. }
  96. // We also try to fetch the blocks at H+2, since we need these
  97. // when building the state while restoring the snapshot. This avoids the race
  98. // condition where we try to restore a snapshot before H+2 exists.
  99. _, err = s.verifyLightBlockAtHeight(ctx, height+2, time.Now())
  100. if err != nil {
  101. return nil, err
  102. }
  103. return header.AppHash, nil
  104. }
  105. // Commit implements StateProvider.
  106. func (s *stateProviderRPC) Commit(ctx context.Context, height uint64) (*types.Commit, error) {
  107. s.Lock()
  108. defer s.Unlock()
  109. header, err := s.verifyLightBlockAtHeight(ctx, height, time.Now())
  110. if err != nil {
  111. return nil, err
  112. }
  113. return header.Commit, nil
  114. }
  115. // State implements StateProvider.
  116. func (s *stateProviderRPC) State(ctx context.Context, height uint64) (sm.State, error) {
  117. s.Lock()
  118. defer s.Unlock()
  119. state := sm.State{
  120. ChainID: s.lc.ChainID(),
  121. InitialHeight: s.initialHeight,
  122. }
  123. if state.InitialHeight == 0 {
  124. state.InitialHeight = 1
  125. }
  126. // The snapshot height maps onto the state heights as follows:
  127. //
  128. // height: last block, i.e. the snapshotted height
  129. // height+1: current block, i.e. the first block we'll process after the snapshot
  130. // height+2: next block, i.e. the second block after the snapshot
  131. //
  132. // We need to fetch the NextValidators from height+2 because if the application changed
  133. // the validator set at the snapshot height then this only takes effect at height+2.
  134. lastLightBlock, err := s.verifyLightBlockAtHeight(ctx, height, time.Now())
  135. if err != nil {
  136. return sm.State{}, err
  137. }
  138. currentLightBlock, err := s.verifyLightBlockAtHeight(ctx, height+1, time.Now())
  139. if err != nil {
  140. return sm.State{}, err
  141. }
  142. nextLightBlock, err := s.verifyLightBlockAtHeight(ctx, height+2, time.Now())
  143. if err != nil {
  144. return sm.State{}, err
  145. }
  146. state.Version = sm.Version{
  147. Consensus: currentLightBlock.Version,
  148. Software: version.TMVersion,
  149. }
  150. state.LastBlockHeight = lastLightBlock.Height
  151. state.LastBlockTime = lastLightBlock.Time
  152. state.LastBlockID = lastLightBlock.Commit.BlockID
  153. state.AppHash = currentLightBlock.AppHash
  154. state.LastResultsHash = currentLightBlock.LastResultsHash
  155. state.LastValidators = lastLightBlock.ValidatorSet
  156. state.Validators = currentLightBlock.ValidatorSet
  157. state.NextValidators = nextLightBlock.ValidatorSet
  158. state.LastHeightValidatorsChanged = nextLightBlock.Height
  159. // We'll also need to fetch consensus params via RPC, using light client verification.
  160. primaryURL, ok := s.providers[s.lc.Primary()]
  161. if !ok || primaryURL == "" {
  162. return sm.State{}, fmt.Errorf("could not find address for primary light client provider")
  163. }
  164. primaryRPC, err := rpcClient(primaryURL)
  165. if err != nil {
  166. return sm.State{}, fmt.Errorf("unable to create RPC client: %w", err)
  167. }
  168. rpcclient := lightrpc.NewClient(s.logger, primaryRPC, s.lc)
  169. result, err := rpcclient.ConsensusParams(ctx, &currentLightBlock.Height)
  170. if err != nil {
  171. return sm.State{}, fmt.Errorf("unable to fetch consensus parameters for height %v: %w",
  172. nextLightBlock.Height, err)
  173. }
  174. state.ConsensusParams = result.ConsensusParams
  175. state.LastHeightConsensusParamsChanged = currentLightBlock.Height
  176. return state, nil
  177. }
  178. // rpcClient sets up a new RPC client
  179. func rpcClient(server string) (*rpchttp.HTTP, error) {
  180. if !strings.Contains(server, "://") {
  181. server = "http://" + server
  182. }
  183. return rpchttp.New(server)
  184. }
  185. type stateProviderP2P struct {
  186. sync.Mutex // light.Client is not concurrency-safe
  187. lc *light.Client
  188. initialHeight int64
  189. paramsSendCh *p2p.Channel
  190. paramsRecvCh chan types.ConsensusParams
  191. }
  192. // NewP2PStateProvider creates a light client state
  193. // provider but uses a dispatcher connected to the P2P layer
  194. func NewP2PStateProvider(
  195. ctx context.Context,
  196. chainID string,
  197. initialHeight int64,
  198. providers []lightprovider.Provider,
  199. trustOptions light.TrustOptions,
  200. paramsSendCh *p2p.Channel,
  201. logger log.Logger,
  202. ) (StateProvider, error) {
  203. if len(providers) < 2 {
  204. return nil, fmt.Errorf("at least 2 peers are required, got %d", len(providers))
  205. }
  206. lc, err := light.NewClient(ctx, chainID, trustOptions, providers[0], providers[1:],
  207. lightdb.New(dbm.NewMemDB()), light.Logger(logger))
  208. if err != nil {
  209. return nil, err
  210. }
  211. return &stateProviderP2P{
  212. lc: lc,
  213. initialHeight: initialHeight,
  214. paramsSendCh: paramsSendCh,
  215. paramsRecvCh: make(chan types.ConsensusParams),
  216. }, nil
  217. }
  218. func (s *stateProviderP2P) verifyLightBlockAtHeight(ctx context.Context, height uint64, ts time.Time) (*types.LightBlock, error) {
  219. ctx, cancel := context.WithTimeout(ctx, 20*time.Second)
  220. defer cancel()
  221. return s.lc.VerifyLightBlockAtHeight(ctx, int64(height), ts)
  222. }
  223. // AppHash implements StateProvider.
  224. func (s *stateProviderP2P) AppHash(ctx context.Context, height uint64) ([]byte, error) {
  225. s.Lock()
  226. defer s.Unlock()
  227. // We have to fetch the next height, which contains the app hash for the previous height.
  228. header, err := s.verifyLightBlockAtHeight(ctx, height+1, time.Now())
  229. if err != nil {
  230. return nil, err
  231. }
  232. // We also try to fetch the blocks at H+2, since we need these
  233. // when building the state while restoring the snapshot. This avoids the race
  234. // condition where we try to restore a snapshot before H+2 exists.
  235. _, err = s.verifyLightBlockAtHeight(ctx, height+2, time.Now())
  236. if err != nil {
  237. return nil, err
  238. }
  239. return header.AppHash, nil
  240. }
  241. // Commit implements StateProvider.
  242. func (s *stateProviderP2P) Commit(ctx context.Context, height uint64) (*types.Commit, error) {
  243. s.Lock()
  244. defer s.Unlock()
  245. header, err := s.verifyLightBlockAtHeight(ctx, height, time.Now())
  246. if err != nil {
  247. return nil, err
  248. }
  249. return header.Commit, nil
  250. }
  251. // State implements StateProvider.
  252. func (s *stateProviderP2P) State(ctx context.Context, height uint64) (sm.State, error) {
  253. s.Lock()
  254. defer s.Unlock()
  255. state := sm.State{
  256. ChainID: s.lc.ChainID(),
  257. InitialHeight: s.initialHeight,
  258. }
  259. if state.InitialHeight == 0 {
  260. state.InitialHeight = 1
  261. }
  262. // The snapshot height maps onto the state heights as follows:
  263. //
  264. // height: last block, i.e. the snapshotted height
  265. // height+1: current block, i.e. the first block we'll process after the snapshot
  266. // height+2: next block, i.e. the second block after the snapshot
  267. //
  268. // We need to fetch the NextValidators from height+2 because if the application changed
  269. // the validator set at the snapshot height then this only takes effect at height+2.
  270. lastLightBlock, err := s.verifyLightBlockAtHeight(ctx, height, time.Now())
  271. if err != nil {
  272. return sm.State{}, err
  273. }
  274. currentLightBlock, err := s.verifyLightBlockAtHeight(ctx, height+1, time.Now())
  275. if err != nil {
  276. return sm.State{}, err
  277. }
  278. nextLightBlock, err := s.verifyLightBlockAtHeight(ctx, height+2, time.Now())
  279. if err != nil {
  280. return sm.State{}, err
  281. }
  282. state.Version = sm.Version{
  283. Consensus: currentLightBlock.Version,
  284. Software: version.TMVersion,
  285. }
  286. state.LastBlockHeight = lastLightBlock.Height
  287. state.LastBlockTime = lastLightBlock.Time
  288. state.LastBlockID = lastLightBlock.Commit.BlockID
  289. state.AppHash = currentLightBlock.AppHash
  290. state.LastResultsHash = currentLightBlock.LastResultsHash
  291. state.LastValidators = lastLightBlock.ValidatorSet
  292. state.Validators = currentLightBlock.ValidatorSet
  293. state.NextValidators = nextLightBlock.ValidatorSet
  294. state.LastHeightValidatorsChanged = nextLightBlock.Height
  295. // We'll also need to fetch consensus params via P2P.
  296. state.ConsensusParams, err = s.consensusParams(ctx, currentLightBlock.Height)
  297. if err != nil {
  298. return sm.State{}, fmt.Errorf("fetching consensus params: %w", err)
  299. }
  300. // validate the consensus params
  301. if !bytes.Equal(nextLightBlock.ConsensusHash, state.ConsensusParams.HashConsensusParams()) {
  302. return sm.State{}, fmt.Errorf("consensus params hash mismatch at height %d. Expected %v, got %v",
  303. currentLightBlock.Height, nextLightBlock.ConsensusHash, state.ConsensusParams.HashConsensusParams())
  304. }
  305. // set the last height changed to the current height
  306. state.LastHeightConsensusParamsChanged = currentLightBlock.Height
  307. return state, nil
  308. }
  309. // addProvider dynamically adds a peer as a new witness. A limit of 6 providers is kept as a
  310. // heuristic. Too many overburdens the network and too little compromises the second layer of security.
  311. func (s *stateProviderP2P) addProvider(p lightprovider.Provider) {
  312. if len(s.lc.Witnesses()) < 6 {
  313. s.lc.AddProvider(p)
  314. }
  315. }
  316. // consensusParams sends out a request for consensus params blocking
  317. // until one is returned.
  318. //
  319. // It attempts to send requests to all witnesses in parallel, but if
  320. // none responds it will retry them all sometime later until it
  321. // receives some response. This operation will block until it receives
  322. // a response or the context is canceled.
  323. func (s *stateProviderP2P) consensusParams(ctx context.Context, height int64) (types.ConsensusParams, error) {
  324. ctx, cancel := context.WithCancel(ctx)
  325. defer cancel()
  326. out := make(chan types.ConsensusParams)
  327. retryAll := func() (<-chan struct{}, error) {
  328. wg := &sync.WaitGroup{}
  329. for _, provider := range s.lc.Witnesses() {
  330. p, ok := provider.(*BlockProvider)
  331. if !ok {
  332. return nil, fmt.Errorf("witness is not BlockProvider [%T]", provider)
  333. }
  334. peer, err := types.NewNodeID(p.String())
  335. if err != nil {
  336. return nil, fmt.Errorf("invalid provider (%s) node id: %w", p.String(), err)
  337. }
  338. wg.Add(1)
  339. go func(p *BlockProvider, peer types.NodeID) {
  340. defer wg.Done()
  341. timer := time.NewTimer(0)
  342. defer timer.Stop()
  343. var iterCount int64
  344. for {
  345. iterCount++
  346. if err := s.paramsSendCh.Send(ctx, p2p.Envelope{
  347. To: peer,
  348. Message: &ssproto.ParamsRequest{
  349. Height: uint64(height),
  350. },
  351. }); err != nil {
  352. // this only errors if
  353. // the context is
  354. // canceled which we
  355. // don't need to
  356. // propagate here
  357. return
  358. }
  359. // jitter+backoff the retry loop
  360. timer.Reset(time.Duration(iterCount)*consensusParamsResponseTimeout +
  361. time.Duration(100*rand.Int63n(iterCount))*time.Millisecond) // nolint:gosec
  362. select {
  363. case <-timer.C:
  364. continue
  365. case <-ctx.Done():
  366. return
  367. case params, ok := <-s.paramsRecvCh:
  368. if !ok {
  369. return
  370. }
  371. select {
  372. case <-ctx.Done():
  373. return
  374. case out <- params:
  375. return
  376. }
  377. }
  378. }
  379. }(p, peer)
  380. }
  381. sig := make(chan struct{})
  382. go func() { wg.Wait(); close(sig) }()
  383. return sig, nil
  384. }
  385. timer := time.NewTimer(0)
  386. defer timer.Stop()
  387. var iterCount int64
  388. for {
  389. iterCount++
  390. sig, err := retryAll()
  391. if err != nil {
  392. return types.ConsensusParams{}, err
  393. }
  394. select {
  395. case <-sig:
  396. // jitter+backoff the retry loop
  397. timer.Reset(time.Duration(iterCount)*consensusParamsResponseTimeout +
  398. time.Duration(100*rand.Int63n(iterCount))*time.Millisecond) // nolint:gosec
  399. select {
  400. case param := <-out:
  401. return param, nil
  402. case <-ctx.Done():
  403. return types.ConsensusParams{}, ctx.Err()
  404. case <-timer.C:
  405. }
  406. case <-ctx.Done():
  407. return types.ConsensusParams{}, ctx.Err()
  408. case param := <-out:
  409. return param, nil
  410. }
  411. }
  412. }