package statesync import ( "bytes" "context" "fmt" "math/rand" "strings" "sync" "time" dbm "github.com/tendermint/tm-db" "github.com/tendermint/tendermint/internal/p2p" sm "github.com/tendermint/tendermint/internal/state" "github.com/tendermint/tendermint/libs/log" "github.com/tendermint/tendermint/light" lightprovider "github.com/tendermint/tendermint/light/provider" lighthttp "github.com/tendermint/tendermint/light/provider/http" lightrpc "github.com/tendermint/tendermint/light/rpc" lightdb "github.com/tendermint/tendermint/light/store/db" ssproto "github.com/tendermint/tendermint/proto/tendermint/statesync" rpchttp "github.com/tendermint/tendermint/rpc/client/http" "github.com/tendermint/tendermint/types" "github.com/tendermint/tendermint/version" ) //go:generate ../../scripts/mockery_generate.sh StateProvider // StateProvider is a provider of trusted state data for bootstrapping a node. This refers // to the state.State object, not the state machine. There are two implementations. One // uses the P2P layer and the other uses the RPC layer. Both use light client verification. type StateProvider interface { // AppHash returns the app hash after the given height has been committed. AppHash(ctx context.Context, height uint64) ([]byte, error) // Commit returns the commit at the given height. Commit(ctx context.Context, height uint64) (*types.Commit, error) // State returns a state object at the given height. State(ctx context.Context, height uint64) (sm.State, error) } type stateProviderRPC struct { sync.Mutex // light.Client is not concurrency-safe lc *light.Client initialHeight int64 providers map[lightprovider.Provider]string logger log.Logger } // NewRPCStateProvider creates a new StateProvider using a light client and RPC clients. func NewRPCStateProvider( ctx context.Context, chainID string, initialHeight int64, servers []string, trustOptions light.TrustOptions, logger log.Logger, ) (StateProvider, error) { if len(servers) < 2 { return nil, fmt.Errorf("at least 2 RPC servers are required, got %d", len(servers)) } providers := make([]lightprovider.Provider, 0, len(servers)) providerRemotes := make(map[lightprovider.Provider]string) for _, server := range servers { client, err := rpcClient(server) if err != nil { return nil, fmt.Errorf("failed to set up RPC client: %w", err) } provider := lighthttp.NewWithClient(chainID, client) providers = append(providers, provider) // We store the RPC addresses keyed by provider, so we can find the address of the primary // provider used by the light client and use it to fetch consensus parameters. providerRemotes[provider] = server } lc, err := light.NewClient(ctx, chainID, trustOptions, providers[0], providers[1:], lightdb.New(dbm.NewMemDB()), light.Logger(logger)) if err != nil { return nil, err } return &stateProviderRPC{ logger: logger, lc: lc, initialHeight: initialHeight, providers: providerRemotes, }, nil } func (s *stateProviderRPC) verifyLightBlockAtHeight(ctx context.Context, height uint64, ts time.Time) (*types.LightBlock, error) { ctx, cancel := context.WithTimeout(ctx, 20*time.Second) defer cancel() return s.lc.VerifyLightBlockAtHeight(ctx, int64(height), ts) } // AppHash implements part of StateProvider. It calls the application to verify the // light blocks at heights h+1 and h+2 and, if verification succeeds, reports the app // hash for the block at height h+1 which correlates to the state at height h. func (s *stateProviderRPC) AppHash(ctx context.Context, height uint64) ([]byte, error) { s.Lock() defer s.Unlock() // We have to fetch the next height, which contains the app hash for the previous height. header, err := s.verifyLightBlockAtHeight(ctx, height+1, time.Now()) if err != nil { return nil, err } // We also try to fetch the blocks at H+2, since we need these // when building the state while restoring the snapshot. This avoids the race // condition where we try to restore a snapshot before H+2 exists. _, err = s.verifyLightBlockAtHeight(ctx, height+2, time.Now()) if err != nil { return nil, err } return header.AppHash, nil } // Commit implements StateProvider. func (s *stateProviderRPC) Commit(ctx context.Context, height uint64) (*types.Commit, error) { s.Lock() defer s.Unlock() header, err := s.verifyLightBlockAtHeight(ctx, height, time.Now()) if err != nil { return nil, err } return header.Commit, nil } // State implements StateProvider. func (s *stateProviderRPC) State(ctx context.Context, height uint64) (sm.State, error) { s.Lock() defer s.Unlock() state := sm.State{ ChainID: s.lc.ChainID(), InitialHeight: s.initialHeight, } if state.InitialHeight == 0 { state.InitialHeight = 1 } // The snapshot height maps onto the state heights as follows: // // height: last block, i.e. the snapshotted height // height+1: current block, i.e. the first block we'll process after the snapshot // height+2: next block, i.e. the second block after the snapshot // // We need to fetch the NextValidators from height+2 because if the application changed // the validator set at the snapshot height then this only takes effect at height+2. lastLightBlock, err := s.verifyLightBlockAtHeight(ctx, height, time.Now()) if err != nil { return sm.State{}, err } currentLightBlock, err := s.verifyLightBlockAtHeight(ctx, height+1, time.Now()) if err != nil { return sm.State{}, err } nextLightBlock, err := s.verifyLightBlockAtHeight(ctx, height+2, time.Now()) if err != nil { return sm.State{}, err } state.Version = sm.Version{ Consensus: currentLightBlock.Version, Software: version.TMVersion, } state.LastBlockHeight = lastLightBlock.Height state.LastBlockTime = lastLightBlock.Time state.LastBlockID = lastLightBlock.Commit.BlockID state.AppHash = currentLightBlock.AppHash state.LastResultsHash = currentLightBlock.LastResultsHash state.LastValidators = lastLightBlock.ValidatorSet state.Validators = currentLightBlock.ValidatorSet state.NextValidators = nextLightBlock.ValidatorSet state.LastHeightValidatorsChanged = nextLightBlock.Height // We'll also need to fetch consensus params via RPC, using light client verification. primaryURL, ok := s.providers[s.lc.Primary()] if !ok || primaryURL == "" { return sm.State{}, fmt.Errorf("could not find address for primary light client provider") } primaryRPC, err := rpcClient(primaryURL) if err != nil { return sm.State{}, fmt.Errorf("unable to create RPC client: %w", err) } rpcclient := lightrpc.NewClient(s.logger, primaryRPC, s.lc) result, err := rpcclient.ConsensusParams(ctx, ¤tLightBlock.Height) if err != nil { return sm.State{}, fmt.Errorf("unable to fetch consensus parameters for height %v: %w", nextLightBlock.Height, err) } state.ConsensusParams = result.ConsensusParams state.LastHeightConsensusParamsChanged = currentLightBlock.Height return state, nil } // rpcClient sets up a new RPC client func rpcClient(server string) (*rpchttp.HTTP, error) { if !strings.Contains(server, "://") { server = "http://" + server } return rpchttp.New(server) } type stateProviderP2P struct { sync.Mutex // light.Client is not concurrency-safe lc *light.Client initialHeight int64 paramsSendCh *p2p.Channel paramsRecvCh chan types.ConsensusParams } // NewP2PStateProvider creates a light client state // provider but uses a dispatcher connected to the P2P layer func NewP2PStateProvider( ctx context.Context, chainID string, initialHeight int64, providers []lightprovider.Provider, trustOptions light.TrustOptions, paramsSendCh *p2p.Channel, logger log.Logger, ) (StateProvider, error) { if len(providers) < 2 { return nil, fmt.Errorf("at least 2 peers are required, got %d", len(providers)) } lc, err := light.NewClient(ctx, chainID, trustOptions, providers[0], providers[1:], lightdb.New(dbm.NewMemDB()), light.Logger(logger)) if err != nil { return nil, err } return &stateProviderP2P{ lc: lc, initialHeight: initialHeight, paramsSendCh: paramsSendCh, paramsRecvCh: make(chan types.ConsensusParams), }, nil } func (s *stateProviderP2P) verifyLightBlockAtHeight(ctx context.Context, height uint64, ts time.Time) (*types.LightBlock, error) { ctx, cancel := context.WithTimeout(ctx, 20*time.Second) defer cancel() return s.lc.VerifyLightBlockAtHeight(ctx, int64(height), ts) } // AppHash implements StateProvider. func (s *stateProviderP2P) AppHash(ctx context.Context, height uint64) ([]byte, error) { s.Lock() defer s.Unlock() // We have to fetch the next height, which contains the app hash for the previous height. header, err := s.verifyLightBlockAtHeight(ctx, height+1, time.Now()) if err != nil { return nil, err } // We also try to fetch the blocks at H+2, since we need these // when building the state while restoring the snapshot. This avoids the race // condition where we try to restore a snapshot before H+2 exists. _, err = s.verifyLightBlockAtHeight(ctx, height+2, time.Now()) if err != nil { return nil, err } return header.AppHash, nil } // Commit implements StateProvider. func (s *stateProviderP2P) Commit(ctx context.Context, height uint64) (*types.Commit, error) { s.Lock() defer s.Unlock() header, err := s.verifyLightBlockAtHeight(ctx, height, time.Now()) if err != nil { return nil, err } return header.Commit, nil } // State implements StateProvider. func (s *stateProviderP2P) State(ctx context.Context, height uint64) (sm.State, error) { s.Lock() defer s.Unlock() state := sm.State{ ChainID: s.lc.ChainID(), InitialHeight: s.initialHeight, } if state.InitialHeight == 0 { state.InitialHeight = 1 } // The snapshot height maps onto the state heights as follows: // // height: last block, i.e. the snapshotted height // height+1: current block, i.e. the first block we'll process after the snapshot // height+2: next block, i.e. the second block after the snapshot // // We need to fetch the NextValidators from height+2 because if the application changed // the validator set at the snapshot height then this only takes effect at height+2. lastLightBlock, err := s.verifyLightBlockAtHeight(ctx, height, time.Now()) if err != nil { return sm.State{}, err } currentLightBlock, err := s.verifyLightBlockAtHeight(ctx, height+1, time.Now()) if err != nil { return sm.State{}, err } nextLightBlock, err := s.verifyLightBlockAtHeight(ctx, height+2, time.Now()) if err != nil { return sm.State{}, err } state.Version = sm.Version{ Consensus: currentLightBlock.Version, Software: version.TMVersion, } state.LastBlockHeight = lastLightBlock.Height state.LastBlockTime = lastLightBlock.Time state.LastBlockID = lastLightBlock.Commit.BlockID state.AppHash = currentLightBlock.AppHash state.LastResultsHash = currentLightBlock.LastResultsHash state.LastValidators = lastLightBlock.ValidatorSet state.Validators = currentLightBlock.ValidatorSet state.NextValidators = nextLightBlock.ValidatorSet state.LastHeightValidatorsChanged = nextLightBlock.Height // We'll also need to fetch consensus params via P2P. state.ConsensusParams, err = s.consensusParams(ctx, currentLightBlock.Height) if err != nil { return sm.State{}, fmt.Errorf("fetching consensus params: %w", err) } // validate the consensus params if !bytes.Equal(nextLightBlock.ConsensusHash, state.ConsensusParams.HashConsensusParams()) { return sm.State{}, fmt.Errorf("consensus params hash mismatch at height %d. Expected %v, got %v", currentLightBlock.Height, nextLightBlock.ConsensusHash, state.ConsensusParams.HashConsensusParams()) } // set the last height changed to the current height state.LastHeightConsensusParamsChanged = currentLightBlock.Height return state, nil } // addProvider dynamically adds a peer as a new witness. A limit of 6 providers is kept as a // heuristic. Too many overburdens the network and too little compromises the second layer of security. func (s *stateProviderP2P) addProvider(p lightprovider.Provider) { if len(s.lc.Witnesses()) < 6 { s.lc.AddProvider(p) } } // consensusParams sends out a request for consensus params blocking // until one is returned. // // It attempts to send requests to all witnesses in parallel, but if // none responds it will retry them all sometime later until it // receives some response. This operation will block until it receives // a response or the context is canceled. func (s *stateProviderP2P) consensusParams(ctx context.Context, height int64) (types.ConsensusParams, error) { ctx, cancel := context.WithCancel(ctx) defer cancel() out := make(chan types.ConsensusParams) retryAll := func() (<-chan struct{}, error) { wg := &sync.WaitGroup{} for _, provider := range s.lc.Witnesses() { p, ok := provider.(*BlockProvider) if !ok { return nil, fmt.Errorf("witness is not BlockProvider [%T]", provider) } peer, err := types.NewNodeID(p.String()) if err != nil { return nil, fmt.Errorf("invalid provider (%s) node id: %w", p.String(), err) } wg.Add(1) go func(p *BlockProvider, peer types.NodeID) { defer wg.Done() timer := time.NewTimer(0) defer timer.Stop() var iterCount int64 for { iterCount++ if err := s.paramsSendCh.Send(ctx, p2p.Envelope{ To: peer, Message: &ssproto.ParamsRequest{ Height: uint64(height), }, }); err != nil { // this only errors if // the context is // canceled which we // don't need to // propagate here return } // jitter+backoff the retry loop timer.Reset(time.Duration(iterCount)*consensusParamsResponseTimeout + time.Duration(100*rand.Int63n(iterCount))*time.Millisecond) // nolint:gosec select { case <-timer.C: continue case <-ctx.Done(): return case params, ok := <-s.paramsRecvCh: if !ok { return } select { case <-ctx.Done(): return case out <- params: return } } } }(p, peer) } sig := make(chan struct{}) go func() { wg.Wait(); close(sig) }() return sig, nil } timer := time.NewTimer(0) defer timer.Stop() var iterCount int64 for { iterCount++ sig, err := retryAll() if err != nil { return types.ConsensusParams{}, err } select { case <-sig: // jitter+backoff the retry loop timer.Reset(time.Duration(iterCount)*consensusParamsResponseTimeout + time.Duration(100*rand.Int63n(iterCount))*time.Millisecond) // nolint:gosec select { case param := <-out: return param, nil case <-ctx.Done(): return types.ConsensusParams{}, ctx.Err() case <-timer.C: } case <-ctx.Done(): return types.ConsensusParams{}, ctx.Err() case param := <-out: return param, nil } } }