You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

412 lines
13 KiB

  1. package statesync
  2. import (
  3. "bytes"
  4. "context"
  5. "errors"
  6. "fmt"
  7. "strings"
  8. "sync"
  9. "time"
  10. dbm "github.com/tendermint/tm-db"
  11. "github.com/tendermint/tendermint/internal/p2p"
  12. sm "github.com/tendermint/tendermint/internal/state"
  13. "github.com/tendermint/tendermint/libs/log"
  14. "github.com/tendermint/tendermint/light"
  15. lightprovider "github.com/tendermint/tendermint/light/provider"
  16. lighthttp "github.com/tendermint/tendermint/light/provider/http"
  17. lightrpc "github.com/tendermint/tendermint/light/rpc"
  18. lightdb "github.com/tendermint/tendermint/light/store/db"
  19. ssproto "github.com/tendermint/tendermint/proto/tendermint/statesync"
  20. rpchttp "github.com/tendermint/tendermint/rpc/client/http"
  21. "github.com/tendermint/tendermint/types"
  22. "github.com/tendermint/tendermint/version"
  23. )
  24. //go:generate ../../scripts/mockery_generate.sh StateProvider
  25. // StateProvider is a provider of trusted state data for bootstrapping a node. This refers
  26. // to the state.State object, not the state machine. There are two implementations. One
  27. // uses the P2P layer and the other uses the RPC layer. Both use light client verification.
  28. type StateProvider interface {
  29. // AppHash returns the app hash after the given height has been committed.
  30. AppHash(ctx context.Context, height uint64) ([]byte, error)
  31. // Commit returns the commit at the given height.
  32. Commit(ctx context.Context, height uint64) (*types.Commit, error)
  33. // State returns a state object at the given height.
  34. State(ctx context.Context, height uint64) (sm.State, error)
  35. }
  36. type stateProviderRPC struct {
  37. sync.Mutex // light.Client is not concurrency-safe
  38. lc *light.Client
  39. initialHeight int64
  40. providers map[lightprovider.Provider]string
  41. logger log.Logger
  42. }
  43. // NewRPCStateProvider creates a new StateProvider using a light client and RPC clients.
  44. func NewRPCStateProvider(
  45. ctx context.Context,
  46. chainID string,
  47. initialHeight int64,
  48. servers []string,
  49. trustOptions light.TrustOptions,
  50. logger log.Logger,
  51. ) (StateProvider, error) {
  52. if len(servers) < 2 {
  53. return nil, fmt.Errorf("at least 2 RPC servers are required, got %d", len(servers))
  54. }
  55. providers := make([]lightprovider.Provider, 0, len(servers))
  56. providerRemotes := make(map[lightprovider.Provider]string)
  57. for _, server := range servers {
  58. client, err := rpcClient(server)
  59. if err != nil {
  60. return nil, fmt.Errorf("failed to set up RPC client: %w", err)
  61. }
  62. provider := lighthttp.NewWithClient(chainID, client)
  63. providers = append(providers, provider)
  64. // We store the RPC addresses keyed by provider, so we can find the address of the primary
  65. // provider used by the light client and use it to fetch consensus parameters.
  66. providerRemotes[provider] = server
  67. }
  68. lc, err := light.NewClient(ctx, chainID, trustOptions, providers[0], providers[1:],
  69. lightdb.New(dbm.NewMemDB()), light.Logger(logger))
  70. if err != nil {
  71. return nil, err
  72. }
  73. return &stateProviderRPC{
  74. logger: logger,
  75. lc: lc,
  76. initialHeight: initialHeight,
  77. providers: providerRemotes,
  78. }, nil
  79. }
  80. // AppHash implements part of StateProvider. It calls the application to verify the
  81. // light blocks at heights h+1 and h+2 and, if verification succeeds, reports the app
  82. // hash for the block at height h+1 which correlates to the state at height h.
  83. func (s *stateProviderRPC) AppHash(ctx context.Context, height uint64) ([]byte, error) {
  84. s.Lock()
  85. defer s.Unlock()
  86. // We have to fetch the next height, which contains the app hash for the previous height.
  87. header, err := s.lc.VerifyLightBlockAtHeight(ctx, int64(height+1), time.Now())
  88. if err != nil {
  89. return nil, err
  90. }
  91. // We also try to fetch the blocks at H+2, since we need these
  92. // when building the state while restoring the snapshot. This avoids the race
  93. // condition where we try to restore a snapshot before H+2 exists.
  94. _, err = s.lc.VerifyLightBlockAtHeight(ctx, int64(height+2), time.Now())
  95. if err != nil {
  96. return nil, err
  97. }
  98. return header.AppHash, nil
  99. }
  100. // Commit implements StateProvider.
  101. func (s *stateProviderRPC) Commit(ctx context.Context, height uint64) (*types.Commit, error) {
  102. s.Lock()
  103. defer s.Unlock()
  104. header, err := s.lc.VerifyLightBlockAtHeight(ctx, int64(height), time.Now())
  105. if err != nil {
  106. return nil, err
  107. }
  108. return header.Commit, nil
  109. }
  110. // State implements StateProvider.
  111. func (s *stateProviderRPC) State(ctx context.Context, height uint64) (sm.State, error) {
  112. s.Lock()
  113. defer s.Unlock()
  114. state := sm.State{
  115. ChainID: s.lc.ChainID(),
  116. InitialHeight: s.initialHeight,
  117. }
  118. if state.InitialHeight == 0 {
  119. state.InitialHeight = 1
  120. }
  121. // The snapshot height maps onto the state heights as follows:
  122. //
  123. // height: last block, i.e. the snapshotted height
  124. // height+1: current block, i.e. the first block we'll process after the snapshot
  125. // height+2: next block, i.e. the second block after the snapshot
  126. //
  127. // We need to fetch the NextValidators from height+2 because if the application changed
  128. // the validator set at the snapshot height then this only takes effect at height+2.
  129. lastLightBlock, err := s.lc.VerifyLightBlockAtHeight(ctx, int64(height), time.Now())
  130. if err != nil {
  131. return sm.State{}, err
  132. }
  133. currentLightBlock, err := s.lc.VerifyLightBlockAtHeight(ctx, int64(height+1), time.Now())
  134. if err != nil {
  135. return sm.State{}, err
  136. }
  137. nextLightBlock, err := s.lc.VerifyLightBlockAtHeight(ctx, int64(height+2), time.Now())
  138. if err != nil {
  139. return sm.State{}, err
  140. }
  141. state.Version = sm.Version{
  142. Consensus: currentLightBlock.Version,
  143. Software: version.TMVersion,
  144. }
  145. state.LastBlockHeight = lastLightBlock.Height
  146. state.LastBlockTime = lastLightBlock.Time
  147. state.LastBlockID = lastLightBlock.Commit.BlockID
  148. state.AppHash = currentLightBlock.AppHash
  149. state.LastResultsHash = currentLightBlock.LastResultsHash
  150. state.LastValidators = lastLightBlock.ValidatorSet
  151. state.Validators = currentLightBlock.ValidatorSet
  152. state.NextValidators = nextLightBlock.ValidatorSet
  153. state.LastHeightValidatorsChanged = nextLightBlock.Height
  154. // We'll also need to fetch consensus params via RPC, using light client verification.
  155. primaryURL, ok := s.providers[s.lc.Primary()]
  156. if !ok || primaryURL == "" {
  157. return sm.State{}, fmt.Errorf("could not find address for primary light client provider")
  158. }
  159. primaryRPC, err := rpcClient(primaryURL)
  160. if err != nil {
  161. return sm.State{}, fmt.Errorf("unable to create RPC client: %w", err)
  162. }
  163. rpcclient := lightrpc.NewClient(s.logger, primaryRPC, s.lc)
  164. result, err := rpcclient.ConsensusParams(ctx, &currentLightBlock.Height)
  165. if err != nil {
  166. return sm.State{}, fmt.Errorf("unable to fetch consensus parameters for height %v: %w",
  167. nextLightBlock.Height, err)
  168. }
  169. state.ConsensusParams = result.ConsensusParams
  170. state.LastHeightConsensusParamsChanged = currentLightBlock.Height
  171. return state, nil
  172. }
  173. // rpcClient sets up a new RPC client
  174. func rpcClient(server string) (*rpchttp.HTTP, error) {
  175. if !strings.Contains(server, "://") {
  176. server = "http://" + server
  177. }
  178. return rpchttp.New(server)
  179. }
  180. type stateProviderP2P struct {
  181. sync.Mutex // light.Client is not concurrency-safe
  182. lc *light.Client
  183. initialHeight int64
  184. paramsSendCh *p2p.Channel
  185. paramsRecvCh chan types.ConsensusParams
  186. }
  187. // NewP2PStateProvider creates a light client state
  188. // provider but uses a dispatcher connected to the P2P layer
  189. func NewP2PStateProvider(
  190. ctx context.Context,
  191. chainID string,
  192. initialHeight int64,
  193. providers []lightprovider.Provider,
  194. trustOptions light.TrustOptions,
  195. paramsSendCh *p2p.Channel,
  196. logger log.Logger,
  197. ) (StateProvider, error) {
  198. if len(providers) < 2 {
  199. return nil, fmt.Errorf("at least 2 peers are required, got %d", len(providers))
  200. }
  201. lc, err := light.NewClient(ctx, chainID, trustOptions, providers[0], providers[1:],
  202. lightdb.New(dbm.NewMemDB()), light.Logger(logger))
  203. if err != nil {
  204. return nil, err
  205. }
  206. return &stateProviderP2P{
  207. lc: lc,
  208. initialHeight: initialHeight,
  209. paramsSendCh: paramsSendCh,
  210. paramsRecvCh: make(chan types.ConsensusParams),
  211. }, nil
  212. }
  213. // AppHash implements StateProvider.
  214. func (s *stateProviderP2P) AppHash(ctx context.Context, height uint64) ([]byte, error) {
  215. s.Lock()
  216. defer s.Unlock()
  217. // We have to fetch the next height, which contains the app hash for the previous height.
  218. header, err := s.lc.VerifyLightBlockAtHeight(ctx, int64(height+1), time.Now())
  219. if err != nil {
  220. return nil, err
  221. }
  222. // We also try to fetch the blocks at H+2, since we need these
  223. // when building the state while restoring the snapshot. This avoids the race
  224. // condition where we try to restore a snapshot before H+2 exists.
  225. _, err = s.lc.VerifyLightBlockAtHeight(ctx, int64(height+2), time.Now())
  226. if err != nil {
  227. return nil, err
  228. }
  229. return header.AppHash, nil
  230. }
  231. // Commit implements StateProvider.
  232. func (s *stateProviderP2P) Commit(ctx context.Context, height uint64) (*types.Commit, error) {
  233. s.Lock()
  234. defer s.Unlock()
  235. header, err := s.lc.VerifyLightBlockAtHeight(ctx, int64(height), time.Now())
  236. if err != nil {
  237. return nil, err
  238. }
  239. return header.Commit, nil
  240. }
  241. // State implements StateProvider.
  242. func (s *stateProviderP2P) State(ctx context.Context, height uint64) (sm.State, error) {
  243. s.Lock()
  244. defer s.Unlock()
  245. state := sm.State{
  246. ChainID: s.lc.ChainID(),
  247. InitialHeight: s.initialHeight,
  248. }
  249. if state.InitialHeight == 0 {
  250. state.InitialHeight = 1
  251. }
  252. // The snapshot height maps onto the state heights as follows:
  253. //
  254. // height: last block, i.e. the snapshotted height
  255. // height+1: current block, i.e. the first block we'll process after the snapshot
  256. // height+2: next block, i.e. the second block after the snapshot
  257. //
  258. // We need to fetch the NextValidators from height+2 because if the application changed
  259. // the validator set at the snapshot height then this only takes effect at height+2.
  260. lastLightBlock, err := s.lc.VerifyLightBlockAtHeight(ctx, int64(height), time.Now())
  261. if err != nil {
  262. return sm.State{}, err
  263. }
  264. currentLightBlock, err := s.lc.VerifyLightBlockAtHeight(ctx, int64(height+1), time.Now())
  265. if err != nil {
  266. return sm.State{}, err
  267. }
  268. nextLightBlock, err := s.lc.VerifyLightBlockAtHeight(ctx, int64(height+2), time.Now())
  269. if err != nil {
  270. return sm.State{}, err
  271. }
  272. state.Version = sm.Version{
  273. Consensus: currentLightBlock.Version,
  274. Software: version.TMVersion,
  275. }
  276. state.LastBlockHeight = lastLightBlock.Height
  277. state.LastBlockTime = lastLightBlock.Time
  278. state.LastBlockID = lastLightBlock.Commit.BlockID
  279. state.AppHash = currentLightBlock.AppHash
  280. state.LastResultsHash = currentLightBlock.LastResultsHash
  281. state.LastValidators = lastLightBlock.ValidatorSet
  282. state.Validators = currentLightBlock.ValidatorSet
  283. state.NextValidators = nextLightBlock.ValidatorSet
  284. state.LastHeightValidatorsChanged = nextLightBlock.Height
  285. // We'll also need to fetch consensus params via P2P.
  286. state.ConsensusParams, err = s.consensusParams(ctx, currentLightBlock.Height)
  287. if err != nil {
  288. return sm.State{}, err
  289. }
  290. // validate the consensus params
  291. if !bytes.Equal(nextLightBlock.ConsensusHash, state.ConsensusParams.HashConsensusParams()) {
  292. return sm.State{}, fmt.Errorf("consensus params hash mismatch at height %d. Expected %v, got %v",
  293. currentLightBlock.Height, nextLightBlock.ConsensusHash, state.ConsensusParams.HashConsensusParams())
  294. }
  295. // set the last height changed to the current height
  296. state.LastHeightConsensusParamsChanged = currentLightBlock.Height
  297. return state, nil
  298. }
  299. // addProvider dynamically adds a peer as a new witness. A limit of 6 providers is kept as a
  300. // heuristic. Too many overburdens the network and too little compromises the second layer of security.
  301. func (s *stateProviderP2P) addProvider(p lightprovider.Provider) {
  302. if len(s.lc.Witnesses()) < 6 {
  303. s.lc.AddProvider(p)
  304. }
  305. }
  306. // consensusParams sends out a request for consensus params blocking
  307. // until one is returned.
  308. //
  309. // If it fails to get a valid set of consensus params from any of the
  310. // providers it returns an error; however, it will retry indefinitely
  311. // (with backoff) until the context is canceled.
  312. func (s *stateProviderP2P) consensusParams(ctx context.Context, height int64) (types.ConsensusParams, error) {
  313. iterCount := 0
  314. for {
  315. params, err := s.tryGetConsensusParamsFromWitnesses(ctx, height)
  316. if err != nil {
  317. return types.ConsensusParams{}, err
  318. }
  319. if params != nil {
  320. return *params, nil
  321. }
  322. iterCount++
  323. select {
  324. case <-ctx.Done():
  325. return types.ConsensusParams{}, ctx.Err()
  326. case <-time.After(time.Duration(iterCount) * consensusParamsResponseTimeout):
  327. }
  328. }
  329. }
  330. // tryGetConsensusParamsFromWitnesses attempts to get consensus
  331. // parameters from the light clients available witnesses. If both
  332. // return parameters are nil, then it can be retried.
  333. func (s *stateProviderP2P) tryGetConsensusParamsFromWitnesses(
  334. ctx context.Context,
  335. height int64,
  336. ) (*types.ConsensusParams, error) {
  337. for _, provider := range s.lc.Witnesses() {
  338. p, ok := provider.(*BlockProvider)
  339. if !ok {
  340. panic("expected p2p state provider to use p2p block providers")
  341. }
  342. // extract the nodeID of the provider
  343. peer, err := types.NewNodeID(p.String())
  344. if err != nil {
  345. return nil, fmt.Errorf("invalid provider (%s) node id: %w", p.String(), err)
  346. }
  347. if err := s.paramsSendCh.Send(ctx, p2p.Envelope{
  348. To: peer,
  349. Message: &ssproto.ParamsRequest{
  350. Height: uint64(height),
  351. },
  352. }); err != nil {
  353. return nil, err
  354. }
  355. select {
  356. // if we get no response from this provider we move on to the next one
  357. case <-time.After(consensusParamsResponseTimeout):
  358. continue
  359. case <-ctx.Done():
  360. return nil, ctx.Err()
  361. case params, ok := <-s.paramsRecvCh:
  362. if !ok {
  363. return nil, errors.New("params channel closed")
  364. }
  365. return &params, nil
  366. }
  367. }
  368. // signal to caller to retry.
  369. return nil, nil
  370. }