- package consensus
-
- import (
- "bytes"
- "context"
- "errors"
- "fmt"
- "io"
- "io/ioutil"
- "os"
- "path"
- "runtime"
- "testing"
- "time"
-
- "github.com/stretchr/testify/require"
-
- "github.com/tendermint/abci/example/dummy"
- abci "github.com/tendermint/abci/types"
- crypto "github.com/tendermint/go-crypto"
- wire "github.com/tendermint/go-wire"
- auto "github.com/tendermint/tmlibs/autofile"
- cmn "github.com/tendermint/tmlibs/common"
- dbm "github.com/tendermint/tmlibs/db"
-
- cfg "github.com/tendermint/tendermint/config"
- "github.com/tendermint/tendermint/proxy"
- sm "github.com/tendermint/tendermint/state"
- "github.com/tendermint/tendermint/types"
- "github.com/tendermint/tmlibs/log"
- )
-
- var consensusReplayConfig *cfg.Config
-
- func init() {
- consensusReplayConfig = ResetConfig("consensus_replay_test")
- }
-
- // These tests ensure we can always recover from failure at any part of the consensus process.
- // There are two general failure scenarios: failure during consensus, and failure while applying the block.
- // Only the latter interacts with the app and store,
- // but the former has to deal with restrictions on re-use of priv_validator keys.
- // The `WAL Tests` are for failures during the consensus;
- // the `Handshake Tests` are for failures in applying the block.
- // With the help of the WAL, we can recover from it all!
-
- // NOTE: Files in this dir are generated by running the `build.sh` therein.
- // It's a simple way to generate wals for a single block, or multiple blocks, with random transactions,
- // and different part sizes. The output is not deterministic.
- // It should only have to be re-run if there is some breaking change to the consensus data structures (eg. blocks, votes)
- // or to the behaviour of the app (eg. computes app hash differently)
- var data_dir = path.Join(cmn.GoPath(), "src/github.com/tendermint/tendermint/consensus", "test_data")
-
- //------------------------------------------------------------------------------------------
- // WAL Tests
-
- // TODO: It would be better to verify explicitly which states we can recover from without the wal
- // and which ones we need the wal for - then we'd also be able to only flush the
- // wal writer when we need to, instead of with every message.
-
- func startNewConsensusStateAndWaitForBlock(t *testing.T, lastBlockHeight int, blockDB dbm.DB, stateDB dbm.DB) {
- logger := log.TestingLogger()
- state, _ := sm.GetState(stateDB, consensusReplayConfig.GenesisFile())
- state.SetLogger(logger.With("module", "state"))
- privValidator := loadPrivValidator(consensusReplayConfig)
- cs := newConsensusStateWithConfigAndBlockStore(consensusReplayConfig, state, privValidator, dummy.NewDummyApplication(), blockDB)
- cs.SetLogger(logger)
-
- bytes, _ := ioutil.ReadFile(cs.config.WalFile())
- // fmt.Printf("====== WAL: \n\r%s\n", bytes)
- t.Logf("====== WAL: \n\r%s\n", bytes)
-
- err := cs.Start()
- require.NoError(t, err)
- defer func() {
- cs.Stop()
- }()
-
- // This is just a signal that we haven't halted; its not something contained
- // in the WAL itself. Assuming the consensus state is running, replay of any
- // WAL, including the empty one, should eventually be followed by a new
- // block, or else something is wrong.
- newBlockCh := make(chan interface{}, 1)
- err = cs.eventBus.Subscribe(context.Background(), testSubscriber, types.EventQueryNewBlock, newBlockCh)
- require.NoError(t, err)
- select {
- case <-newBlockCh:
- case <-time.After(10 * time.Second):
- t.Fatalf("Timed out waiting for new block (see trace above)")
- }
- }
-
- func sendTxs(cs *ConsensusState, ctx context.Context) {
- i := 0
- for {
- select {
- case <-ctx.Done():
- return
- default:
- cs.mempool.CheckTx([]byte{byte(i)}, nil)
- i++
- }
- }
- }
-
- // TestWALCrash uses crashing WAL to test we can recover from any WAL failure.
- func TestWALCrash(t *testing.T) {
- testCases := []struct {
- name string
- initFn func(*ConsensusState, context.Context)
- heightToStop uint64
- }{
- {"empty block",
- func(cs *ConsensusState, ctx context.Context) {},
- 1},
- {"block with a smaller part size",
- func(cs *ConsensusState, ctx context.Context) {
- // XXX: is there a better way to change BlockPartSizeBytes?
- params := cs.state.Params
- params.BlockPartSizeBytes = 512
- cs.state.Params = params
- sendTxs(cs, ctx)
- },
- 1},
- {"many non-empty blocks",
- sendTxs,
- 3},
- }
-
- for _, tc := range testCases {
- t.Run(tc.name, func(t *testing.T) {
- crashWALandCheckLiveness(t, tc.initFn, tc.heightToStop)
- })
- }
- }
-
- func crashWALandCheckLiveness(t *testing.T, initFn func(*ConsensusState, context.Context), heightToStop uint64) {
- walPaniced := make(chan error)
- crashingWal := &crashingWAL{panicCh: walPaniced, heightToStop: heightToStop}
-
- i := 1
- LOOP:
- for {
- // fmt.Printf("====== LOOP %d\n", i)
- t.Logf("====== LOOP %d\n", i)
-
- // create consensus state from a clean slate
- logger := log.NewNopLogger()
- stateDB := dbm.NewMemDB()
- state, _ := sm.MakeGenesisStateFromFile(stateDB, consensusReplayConfig.GenesisFile())
- state.SetLogger(logger.With("module", "state"))
- privValidator := loadPrivValidator(consensusReplayConfig)
- blockDB := dbm.NewMemDB()
- cs := newConsensusStateWithConfigAndBlockStore(consensusReplayConfig, state, privValidator, dummy.NewDummyApplication(), blockDB)
- cs.SetLogger(logger)
-
- // start sending transactions
- ctx, cancel := context.WithCancel(context.Background())
- go initFn(cs, ctx)
-
- // clean up WAL file from the previous iteration
- walFile := cs.config.WalFile()
- os.Remove(walFile)
-
- // set crashing WAL
- csWal, err := cs.OpenWAL(walFile)
- require.NoError(t, err)
- crashingWal.next = csWal
- // reset the message counter
- crashingWal.msgIndex = 1
- cs.wal = crashingWal
-
- // start consensus state
- err = cs.Start()
- require.NoError(t, err)
-
- i++
-
- select {
- case err := <-walPaniced:
- t.Logf("WAL paniced: %v", err)
-
- // make sure we can make blocks after a crash
- startNewConsensusStateAndWaitForBlock(t, cs.Height, blockDB, stateDB)
-
- // stop consensus state and transactions sender (initFn)
- cs.Stop()
- cancel()
-
- // if we reached the required height, exit
- if _, ok := err.(ReachedHeightToStopError); ok {
- break LOOP
- }
- case <-time.After(10 * time.Second):
- t.Fatal("WAL did not panic for 10 seconds (check the log)")
- }
- }
- }
-
- // crashingWAL is a WAL which crashes or rather simulates a crash during Save
- // (before and after). It remembers a message for which we last panicked
- // (lastPanicedForMsgIndex), so we don't panic for it in subsequent iterations.
- type crashingWAL struct {
- next WAL
- panicCh chan error
- heightToStop uint64
-
- msgIndex int // current message index
- lastPanicedForMsgIndex int // last message for which we panicked
- }
-
- // WALWriteError indicates a WAL crash.
- type WALWriteError struct {
- msg string
- }
-
- func (e WALWriteError) Error() string {
- return e.msg
- }
-
- // ReachedHeightToStopError indicates we've reached the required consensus
- // height and may exit.
- type ReachedHeightToStopError struct {
- height uint64
- }
-
- func (e ReachedHeightToStopError) Error() string {
- return fmt.Sprintf("reached height to stop %d", e.height)
- }
-
- // Save simulate WAL's crashing by sending an error to the panicCh and then
- // exiting the cs.receiveRoutine.
- func (w *crashingWAL) Save(m WALMessage) {
- if endMsg, ok := m.(EndHeightMessage); ok {
- if endMsg.Height == w.heightToStop {
- w.panicCh <- ReachedHeightToStopError{endMsg.Height}
- runtime.Goexit()
- } else {
- w.next.Save(m)
- }
- return
- }
-
- if w.msgIndex > w.lastPanicedForMsgIndex {
- w.lastPanicedForMsgIndex = w.msgIndex
- _, file, line, _ := runtime.Caller(1)
- w.panicCh <- WALWriteError{fmt.Sprintf("failed to write %T to WAL (fileline: %s:%d)", m, file, line)}
- runtime.Goexit()
- } else {
- w.msgIndex++
- w.next.Save(m)
- }
- }
-
- func (w *crashingWAL) Group() *auto.Group { return w.next.Group() }
- func (w *crashingWAL) SearchForEndHeight(height uint64) (gr *auto.GroupReader, found bool, err error) {
- return w.next.SearchForEndHeight(height)
- }
-
- func (w *crashingWAL) Start() error { return w.next.Start() }
- func (w *crashingWAL) Stop() error { return w.next.Stop() }
- func (w *crashingWAL) Wait() { w.next.Wait() }
-
- //------------------------------------------------------------------------------------------
- // Handshake Tests
-
- var (
- NUM_BLOCKS = 6 // number of blocks in the test_data/many_blocks.cswal
- mempool = types.MockMempool{}
- )
-
- //---------------------------------------
- // Test handshake/replay
-
- // 0 - all synced up
- // 1 - saved block but app and state are behind
- // 2 - save block and committed but state is behind
- var modes = []uint{0, 1, 2}
-
- // Sync from scratch
- func TestHandshakeReplayAll(t *testing.T) {
- for _, m := range modes {
- testHandshakeReplay(t, 0, m)
- }
- }
-
- // Sync many, not from scratch
- func TestHandshakeReplaySome(t *testing.T) {
- for _, m := range modes {
- testHandshakeReplay(t, 1, m)
- }
- }
-
- // Sync from lagging by one
- func TestHandshakeReplayOne(t *testing.T) {
- for _, m := range modes {
- testHandshakeReplay(t, NUM_BLOCKS-1, m)
- }
- }
-
- // Sync from caught up
- func TestHandshakeReplayNone(t *testing.T) {
- for _, m := range modes {
- testHandshakeReplay(t, NUM_BLOCKS, m)
- }
- }
-
- func writeWAL(walMsgs []byte) string {
- walFile, err := ioutil.TempFile("", "wal")
- if err != nil {
- panic(fmt.Errorf("failed to create temp WAL file: %v", err))
- }
- _, err = walFile.Write(walMsgs)
- if err != nil {
- panic(fmt.Errorf("failed to write to temp WAL file: %v", err))
- }
- if err := walFile.Close(); err != nil {
- panic(fmt.Errorf("failed to close temp WAL file: %v", err))
- }
- return walFile.Name()
- }
-
- // Make some blocks. Start a fresh app and apply nBlocks blocks. Then restart the app and sync it up with the remaining blocks
- func testHandshakeReplay(t *testing.T, nBlocks int, mode uint) {
- config := ResetConfig("proxy_test_")
-
- // copy the many_blocks file
- walBody, err := cmn.ReadFile(path.Join(data_dir, "many_blocks.cswal"))
- if err != nil {
- t.Fatal(err)
- }
- walFile := writeWAL(walBody)
- config.Consensus.SetWalFile(walFile)
-
- privVal := types.LoadPrivValidatorFS(config.PrivValidatorFile())
-
- wal, err := NewWAL(walFile, false)
- if err != nil {
- t.Fatal(err)
- }
- wal.SetLogger(log.TestingLogger())
- if err := wal.Start(); err != nil {
- t.Fatal(err)
- }
- chain, commits, err := makeBlockchainFromWAL(wal)
- if err != nil {
- t.Fatalf(err.Error())
- }
-
- state, store := stateAndStore(config, privVal.GetPubKey())
- store.chain = chain
- store.commits = commits
-
- // run the chain through state.ApplyBlock to build up the tendermint state
- latestAppHash := buildTMStateFromChain(config, state, chain, mode)
-
- // make a new client creator
- dummyApp := dummy.NewPersistentDummyApplication(path.Join(config.DBDir(), "2"))
- clientCreator2 := proxy.NewLocalClientCreator(dummyApp)
- if nBlocks > 0 {
- // run nBlocks against a new client to build up the app state.
- // use a throwaway tendermint state
- proxyApp := proxy.NewAppConns(clientCreator2, nil)
- state, _ := stateAndStore(config, privVal.GetPubKey())
- buildAppStateFromChain(proxyApp, state, chain, nBlocks, mode)
- }
-
- // now start the app using the handshake - it should sync
- handshaker := NewHandshaker(state, store)
- proxyApp := proxy.NewAppConns(clientCreator2, handshaker)
- if err := proxyApp.Start(); err != nil {
- t.Fatalf("Error starting proxy app connections: %v", err)
- }
-
- // get the latest app hash from the app
- res, err := proxyApp.Query().InfoSync(abci.RequestInfo{""})
- if err != nil {
- t.Fatal(err)
- }
-
- // the app hash should be synced up
- if !bytes.Equal(latestAppHash, res.LastBlockAppHash) {
- t.Fatalf("Expected app hashes to match after handshake/replay. got %X, expected %X", res.LastBlockAppHash, latestAppHash)
- }
-
- expectedBlocksToSync := NUM_BLOCKS - nBlocks
- if nBlocks == NUM_BLOCKS && mode > 0 {
- expectedBlocksToSync += 1
- } else if nBlocks > 0 && mode == 1 {
- expectedBlocksToSync += 1
- }
-
- if handshaker.NBlocks() != expectedBlocksToSync {
- t.Fatalf("Expected handshake to sync %d blocks, got %d", expectedBlocksToSync, handshaker.NBlocks())
- }
- }
-
- func applyBlock(st *sm.State, blk *types.Block, proxyApp proxy.AppConns) {
- testPartSize := st.Params.BlockPartSizeBytes
- err := st.ApplyBlock(types.NopEventBus{}, proxyApp.Consensus(), blk, blk.MakePartSet(testPartSize).Header(), mempool)
- if err != nil {
- panic(err)
- }
- }
-
- func buildAppStateFromChain(proxyApp proxy.AppConns,
- state *sm.State, chain []*types.Block, nBlocks int, mode uint) {
- // start a new app without handshake, play nBlocks blocks
- if err := proxyApp.Start(); err != nil {
- panic(err)
- }
-
- validators := types.TM2PB.Validators(state.Validators)
- if _, err := proxyApp.Consensus().InitChainSync(abci.RequestInitChain{validators}); err != nil {
- panic(err)
- }
-
- defer proxyApp.Stop()
- switch mode {
- case 0:
- for i := 0; i < nBlocks; i++ {
- block := chain[i]
- applyBlock(state, block, proxyApp)
- }
- case 1, 2:
- for i := 0; i < nBlocks-1; i++ {
- block := chain[i]
- applyBlock(state, block, proxyApp)
- }
-
- if mode == 2 {
- // update the dummy height and apphash
- // as if we ran commit but not
- applyBlock(state, chain[nBlocks-1], proxyApp)
- }
- }
-
- }
-
- func buildTMStateFromChain(config *cfg.Config, state *sm.State, chain []*types.Block, mode uint) []byte {
- // run the whole chain against this client to build up the tendermint state
- clientCreator := proxy.NewLocalClientCreator(dummy.NewPersistentDummyApplication(path.Join(config.DBDir(), "1")))
- proxyApp := proxy.NewAppConns(clientCreator, nil) // sm.NewHandshaker(config, state, store, ReplayLastBlock))
- if err := proxyApp.Start(); err != nil {
- panic(err)
- }
- defer proxyApp.Stop()
-
- validators := types.TM2PB.Validators(state.Validators)
- if _, err := proxyApp.Consensus().InitChainSync(abci.RequestInitChain{validators}); err != nil {
- panic(err)
- }
-
- var latestAppHash []byte
-
- switch mode {
- case 0:
- // sync right up
- for _, block := range chain {
- applyBlock(state, block, proxyApp)
- }
-
- latestAppHash = state.AppHash
- case 1, 2:
- // sync up to the penultimate as if we stored the block.
- // whether we commit or not depends on the appHash
- for _, block := range chain[:len(chain)-1] {
- applyBlock(state, block, proxyApp)
- }
-
- // apply the final block to a state copy so we can
- // get the right next appHash but keep the state back
- stateCopy := state.Copy()
- applyBlock(stateCopy, chain[len(chain)-1], proxyApp)
- latestAppHash = stateCopy.AppHash
- }
-
- return latestAppHash
- }
-
- //--------------------------
- // utils for making blocks
-
- func makeBlockchainFromWAL(wal WAL) ([]*types.Block, []*types.Commit, error) {
- // Search for height marker
- gr, found, err := wal.SearchForEndHeight(0)
- if err != nil {
- return nil, nil, err
- }
- if !found {
- return nil, nil, errors.New(cmn.Fmt("WAL does not contain height %d.", 1))
- }
- defer gr.Close() // nolint: errcheck
-
- // log.Notice("Build a blockchain by reading from the WAL")
-
- var blockParts *types.PartSet
- var blocks []*types.Block
- var commits []*types.Commit
-
- dec := NewWALDecoder(gr)
- for {
- msg, err := dec.Decode()
- if err == io.EOF {
- break
- } else if err != nil {
- return nil, nil, err
- }
-
- piece := readPieceFromWAL(msg)
- if piece == nil {
- continue
- }
-
- switch p := piece.(type) {
- case *types.PartSetHeader:
- // if its not the first one, we have a full block
- if blockParts != nil {
- var n int
- block := wire.ReadBinary(&types.Block{}, blockParts.GetReader(), 0, &n, &err).(*types.Block)
- blocks = append(blocks, block)
- }
- blockParts = types.NewPartSetFromHeader(*p)
- case *types.Part:
- _, err := blockParts.AddPart(p, false)
- if err != nil {
- return nil, nil, err
- }
- case *types.Vote:
- if p.Type == types.VoteTypePrecommit {
- commit := &types.Commit{
- BlockID: p.BlockID,
- Precommits: []*types.Vote{p},
- }
- commits = append(commits, commit)
- }
- }
- }
- // grab the last block too
- var n int
- block := wire.ReadBinary(&types.Block{}, blockParts.GetReader(), 0, &n, &err).(*types.Block)
- blocks = append(blocks, block)
- return blocks, commits, nil
- }
-
- func readPieceFromWAL(msg *TimedWALMessage) interface{} {
- // skip meta messages
- if _, ok := msg.Msg.(EndHeightMessage); ok {
- return nil
- }
-
- // for logging
- switch m := msg.Msg.(type) {
- case msgInfo:
- switch msg := m.Msg.(type) {
- case *ProposalMessage:
- return &msg.Proposal.BlockPartsHeader
- case *BlockPartMessage:
- return msg.Part
- case *VoteMessage:
- return msg.Vote
- }
- }
-
- return nil
- }
-
- // fresh state and mock store
- func stateAndStore(config *cfg.Config, pubKey crypto.PubKey) (*sm.State, *mockBlockStore) {
- stateDB := dbm.NewMemDB()
- state, _ := sm.MakeGenesisStateFromFile(stateDB, config.GenesisFile())
- state.SetLogger(log.TestingLogger().With("module", "state"))
-
- store := NewMockBlockStore(config, state.Params)
- return state, store
- }
-
- //----------------------------------
- // mock block store
-
- type mockBlockStore struct {
- config *cfg.Config
- params types.ConsensusParams
- chain []*types.Block
- commits []*types.Commit
- }
-
- // TODO: NewBlockStore(db.NewMemDB) ...
- func NewMockBlockStore(config *cfg.Config, params types.ConsensusParams) *mockBlockStore {
- return &mockBlockStore{config, params, nil, nil}
- }
-
- func (bs *mockBlockStore) Height() int { return len(bs.chain) }
- func (bs *mockBlockStore) LoadBlock(height int) *types.Block { return bs.chain[height-1] }
- func (bs *mockBlockStore) LoadBlockMeta(height int) *types.BlockMeta {
- block := bs.chain[height-1]
- return &types.BlockMeta{
- BlockID: types.BlockID{block.Hash(), block.MakePartSet(bs.params.BlockPartSizeBytes).Header()},
- Header: block.Header,
- }
- }
- func (bs *mockBlockStore) LoadBlockPart(height int, index int) *types.Part { return nil }
- func (bs *mockBlockStore) SaveBlock(block *types.Block, blockParts *types.PartSet, seenCommit *types.Commit) {
- }
- func (bs *mockBlockStore) LoadBlockCommit(height int) *types.Commit {
- return bs.commits[height-1]
- }
- func (bs *mockBlockStore) LoadSeenCommit(height int) *types.Commit {
- return bs.commits[height-1]
- }
|