zolfa
/
tendermint


								package consensus


								import (

									"bytes"

									"context"

									"errors"

									"fmt"

									"io"

									"io/ioutil"

									"os"

									"path"

									"runtime"

									"testing"

									"time"


									"github.com/stretchr/testify/require"


									"github.com/tendermint/abci/example/dummy"

									abci "github.com/tendermint/abci/types"

									crypto "github.com/tendermint/go-crypto"

									wire "github.com/tendermint/go-wire"

									auto "github.com/tendermint/tmlibs/autofile"

									cmn "github.com/tendermint/tmlibs/common"

									dbm "github.com/tendermint/tmlibs/db"


									cfg "github.com/tendermint/tendermint/config"

									"github.com/tendermint/tendermint/proxy"

									sm "github.com/tendermint/tendermint/state"

									"github.com/tendermint/tendermint/types"

									"github.com/tendermint/tmlibs/log"

								)


								var consensusReplayConfig *cfg.Config


								func init() {

									consensusReplayConfig = ResetConfig("consensus_replay_test")

								}


								// These tests ensure we can always recover from failure at any part of the consensus process.

								// There are two general failure scenarios: failure during consensus, and failure while applying the block.

								// Only the latter interacts with the app and store,

								// but the former has to deal with restrictions on re-use of priv_validator keys.

								// The `WAL Tests` are for failures during the consensus;

								// the `Handshake Tests` are for failures in applying the block.

								// With the help of the WAL, we can recover from it all!


								// NOTE: Files in this dir are generated by running the `build.sh` therein.

								// It's a simple way to generate wals for a single block, or multiple blocks, with random transactions,

								// and different part sizes. The output is not deterministic.

								// It should only have to be re-run if there is some breaking change to the consensus data structures (eg. blocks, votes)

								// or to the behaviour of the app (eg. computes app hash differently)

								var data_dir = path.Join(cmn.GoPath(), "src/github.com/tendermint/tendermint/consensus", "test_data")


								//------------------------------------------------------------------------------------------

								// WAL Tests


								// TODO: It would be better to verify explicitly which states we can recover from without the wal

								// and which ones we need the wal for - then we'd also be able to only flush the

								// wal writer when we need to, instead of with every message.


								func startNewConsensusStateAndWaitForBlock(t *testing.T, lastBlockHeight int64, blockDB dbm.DB, stateDB dbm.DB) {

									logger := log.TestingLogger()

									state, _ := sm.GetState(stateDB, consensusReplayConfig.GenesisFile())

									state.SetLogger(logger.With("module", "state"))

									privValidator := loadPrivValidator(consensusReplayConfig)

									cs := newConsensusStateWithConfigAndBlockStore(consensusReplayConfig, state, privValidator, dummy.NewDummyApplication(), blockDB)

									cs.SetLogger(logger)


									bytes, _ := ioutil.ReadFile(cs.config.WalFile())

									// fmt.Printf("====== WAL: \n\r%s\n", bytes)

									t.Logf("====== WAL: \n\r%s\n", bytes)


									err := cs.Start()

									require.NoError(t, err)

									defer func() {

										cs.Stop()

									}()


									// This is just a signal that we haven't halted; its not something contained

									// in the WAL itself. Assuming the consensus state is running, replay of any

									// WAL, including the empty one, should eventually be followed by a new

									// block, or else something is wrong.

									newBlockCh := make(chan interface{}, 1)

									err = cs.eventBus.Subscribe(context.Background(), testSubscriber, types.EventQueryNewBlock, newBlockCh)

									require.NoError(t, err)

									select {

									case <-newBlockCh:

									case <-time.After(10 * time.Second):

										t.Fatalf("Timed out waiting for new block (see trace above)")

									}

								}


								func sendTxs(cs *ConsensusState, ctx context.Context) {

									i := 0

									for {

										select {

										case <-ctx.Done():

											return

										default:

											cs.mempool.CheckTx([]byte{byte(i)}, nil)

											i++

										}

									}

								}


								// TestWALCrash uses crashing WAL to test we can recover from any WAL failure.

								func TestWALCrash(t *testing.T) {

									testCases := []struct {

										name         string

										initFn       func(*ConsensusState, context.Context)

										heightToStop int64

									}{

										{"empty block",

											func(cs *ConsensusState, ctx context.Context) {},

											1},

										{"block with a smaller part size",

											func(cs *ConsensusState, ctx context.Context) {

												// XXX: is there a better way to change BlockPartSizeBytes?

												params := cs.state.Params

												params.BlockPartSizeBytes = 512

												cs.state.Params = params

												sendTxs(cs, ctx)

											},

											1},

										{"many non-empty blocks",

											sendTxs,

											3},

									}


									for _, tc := range testCases {

										t.Run(tc.name, func(t *testing.T) {

											crashWALandCheckLiveness(t, tc.initFn, tc.heightToStop)

										})

									}

								}


								func crashWALandCheckLiveness(t *testing.T, initFn func(*ConsensusState, context.Context), heightToStop int64) {

									walPaniced := make(chan error)

									crashingWal := &crashingWAL{panicCh: walPaniced, heightToStop: heightToStop}


									i := 1

								LOOP:

									for {

										// fmt.Printf("====== LOOP %d\n", i)

										t.Logf("====== LOOP %d\n", i)


										// create consensus state from a clean slate

										logger := log.NewNopLogger()

										stateDB := dbm.NewMemDB()

										state, _ := sm.MakeGenesisStateFromFile(stateDB, consensusReplayConfig.GenesisFile())

										state.SetLogger(logger.With("module", "state"))

										privValidator := loadPrivValidator(consensusReplayConfig)

										blockDB := dbm.NewMemDB()

										cs := newConsensusStateWithConfigAndBlockStore(consensusReplayConfig, state, privValidator, dummy.NewDummyApplication(), blockDB)

										cs.SetLogger(logger)


										// start sending transactions

										ctx, cancel := context.WithCancel(context.Background())

										go initFn(cs, ctx)


										// clean up WAL file from the previous iteration

										walFile := cs.config.WalFile()

										os.Remove(walFile)


										// set crashing WAL

										csWal, err := cs.OpenWAL(walFile)

										require.NoError(t, err)

										crashingWal.next = csWal

										// reset the message counter

										crashingWal.msgIndex = 1

										cs.wal = crashingWal


										// start consensus state

										err = cs.Start()

										require.NoError(t, err)


										i++


										select {

										case err := <-walPaniced:

											t.Logf("WAL paniced: %v", err)


											// make sure we can make blocks after a crash

											startNewConsensusStateAndWaitForBlock(t, cs.Height, blockDB, stateDB)


											// stop consensus state and transactions sender (initFn)

											cs.Stop()

											cancel()


											// if we reached the required height, exit

											if _, ok := err.(ReachedHeightToStopError); ok {

												break LOOP

											}

										case <-time.After(10 * time.Second):

											t.Fatal("WAL did not panic for 10 seconds (check the log)")

										}

									}

								}


								// crashingWAL is a WAL which crashes or rather simulates a crash during Save

								// (before and after). It remembers a message for which we last panicked

								// (lastPanicedForMsgIndex), so we don't panic for it in subsequent iterations.

								type crashingWAL struct {

									next         WAL

									panicCh      chan error

									heightToStop int64


									msgIndex               int // current message index

									lastPanicedForMsgIndex int // last message for which we panicked

								}


								// WALWriteError indicates a WAL crash.

								type WALWriteError struct {

									msg string

								}


								func (e WALWriteError) Error() string {

									return e.msg

								}


								// ReachedHeightToStopError indicates we've reached the required consensus

								// height and may exit.

								type ReachedHeightToStopError struct {

									height int64

								}


								func (e ReachedHeightToStopError) Error() string {

									return fmt.Sprintf("reached height to stop %d", e.height)

								}


								// Save simulate WAL's crashing by sending an error to the panicCh and then

								// exiting the cs.receiveRoutine.

								func (w *crashingWAL) Save(m WALMessage) {

									if endMsg, ok := m.(EndHeightMessage); ok {

										if endMsg.Height == w.heightToStop {

											w.panicCh <- ReachedHeightToStopError{endMsg.Height}

											runtime.Goexit()

										} else {

											w.next.Save(m)

										}

										return

									}


									if w.msgIndex > w.lastPanicedForMsgIndex {

										w.lastPanicedForMsgIndex = w.msgIndex

										_, file, line, _ := runtime.Caller(1)

										w.panicCh <- WALWriteError{fmt.Sprintf("failed to write %T to WAL (fileline: %s:%d)", m, file, line)}

										runtime.Goexit()

									} else {

										w.msgIndex++

										w.next.Save(m)

									}

								}


								func (w *crashingWAL) Group() *auto.Group { return w.next.Group() }

								func (w *crashingWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (gr *auto.GroupReader, found bool, err error) {

									return w.next.SearchForEndHeight(height, options)

								}


								func (w *crashingWAL) Start() error { return w.next.Start() }

								func (w *crashingWAL) Stop() error  { return w.next.Stop() }

								func (w *crashingWAL) Wait()        { w.next.Wait() }


								//------------------------------------------------------------------------------------------

								// Handshake Tests


								const (

									NUM_BLOCKS = 6

								)


								var (

									mempool = types.MockMempool{}

								)


								//---------------------------------------

								// Test handshake/replay


								// 0 - all synced up

								// 1 - saved block but app and state are behind

								// 2 - save block and committed but state is behind

								var modes = []uint{0, 1, 2}


								// Sync from scratch

								func TestHandshakeReplayAll(t *testing.T) {

									for _, m := range modes {

										testHandshakeReplay(t, 0, m)

									}

								}


								// Sync many, not from scratch

								func TestHandshakeReplaySome(t *testing.T) {

									for _, m := range modes {

										testHandshakeReplay(t, 1, m)

									}

								}


								// Sync from lagging by one

								func TestHandshakeReplayOne(t *testing.T) {

									for _, m := range modes {

										testHandshakeReplay(t, NUM_BLOCKS-1, m)

									}

								}


								// Sync from caught up

								func TestHandshakeReplayNone(t *testing.T) {

									for _, m := range modes {

										testHandshakeReplay(t, NUM_BLOCKS, m)

									}

								}


								func tempWALWithData(data []byte) string {

									walFile, err := ioutil.TempFile("", "wal")

									if err != nil {

										panic(fmt.Errorf("failed to create temp WAL file: %v", err))

									}

									_, err = walFile.Write(data)

									if err != nil {

										panic(fmt.Errorf("failed to write to temp WAL file: %v", err))

									}

									if err := walFile.Close(); err != nil {

										panic(fmt.Errorf("failed to close temp WAL file: %v", err))

									}

									return walFile.Name()

								}


								// Make some blocks. Start a fresh app and apply nBlocks blocks. Then restart the app and sync it up with the remaining blocks

								func testHandshakeReplay(t *testing.T, nBlocks int, mode uint) {

									config := ResetConfig("proxy_test_")


									walBody, err := WALWithNBlocks(NUM_BLOCKS)

									if err != nil {

										t.Fatal(err)

									}

									walFile := tempWALWithData(walBody)

									config.Consensus.SetWalFile(walFile)


									privVal := types.LoadPrivValidatorFS(config.PrivValidatorFile())


									wal, err := NewWAL(walFile, false)

									if err != nil {

										t.Fatal(err)

									}

									wal.SetLogger(log.TestingLogger())

									if err := wal.Start(); err != nil {

										t.Fatal(err)

									}

									chain, commits, err := makeBlockchainFromWAL(wal)

									if err != nil {

										t.Fatalf(err.Error())

									}


									state, store := stateAndStore(config, privVal.GetPubKey())

									store.chain = chain

									store.commits = commits


									// run the chain through state.ApplyBlock to build up the tendermint state

									latestAppHash := buildTMStateFromChain(config, state, chain, mode)


									// make a new client creator

									dummyApp := dummy.NewPersistentDummyApplication(path.Join(config.DBDir(), "2"))

									clientCreator2 := proxy.NewLocalClientCreator(dummyApp)

									if nBlocks > 0 {

										// run nBlocks against a new client to build up the app state.

										// use a throwaway tendermint state

										proxyApp := proxy.NewAppConns(clientCreator2, nil)

										state, _ := stateAndStore(config, privVal.GetPubKey())

										buildAppStateFromChain(proxyApp, state, chain, nBlocks, mode)

									}


									// now start the app using the handshake - it should sync

									handshaker := NewHandshaker(state, store)

									proxyApp := proxy.NewAppConns(clientCreator2, handshaker)

									if err := proxyApp.Start(); err != nil {

										t.Fatalf("Error starting proxy app connections: %v", err)

									}


									// get the latest app hash from the app

									res, err := proxyApp.Query().InfoSync(abci.RequestInfo{""})

									if err != nil {

										t.Fatal(err)

									}


									// the app hash should be synced up

									if !bytes.Equal(latestAppHash, res.LastBlockAppHash) {

										t.Fatalf("Expected app hashes to match after handshake/replay. got %X, expected %X", res.LastBlockAppHash, latestAppHash)

									}


									expectedBlocksToSync := NUM_BLOCKS - nBlocks

									if nBlocks == NUM_BLOCKS && mode > 0 {

										expectedBlocksToSync += 1

									} else if nBlocks > 0 && mode == 1 {

										expectedBlocksToSync += 1

									}


									if handshaker.NBlocks() != expectedBlocksToSync {

										t.Fatalf("Expected handshake to sync %d blocks, got %d", expectedBlocksToSync, handshaker.NBlocks())

									}

								}


								func applyBlock(st *sm.State, blk *types.Block, proxyApp proxy.AppConns) {

									testPartSize := st.Params.BlockPartSizeBytes

									err := st.ApplyBlock(types.NopEventBus{}, proxyApp.Consensus(), blk, blk.MakePartSet(testPartSize).Header(), mempool)

									if err != nil {

										panic(err)

									}

								}


								func buildAppStateFromChain(proxyApp proxy.AppConns,

									state *sm.State, chain []*types.Block, nBlocks int, mode uint) {

									// start a new app without handshake, play nBlocks blocks

									if err := proxyApp.Start(); err != nil {

										panic(err)

									}


									validators := types.TM2PB.Validators(state.Validators)

									if _, err := proxyApp.Consensus().InitChainSync(abci.RequestInitChain{validators}); err != nil {

										panic(err)

									}


									defer proxyApp.Stop()

									switch mode {

									case 0:

										for i := 0; i < nBlocks; i++ {

											block := chain[i]

											applyBlock(state, block, proxyApp)

										}

									case 1, 2:

										for i := 0; i < nBlocks-1; i++ {

											block := chain[i]

											applyBlock(state, block, proxyApp)

										}


										if mode == 2 {

											// update the dummy height and apphash

											// as if we ran commit but not

											applyBlock(state, chain[nBlocks-1], proxyApp)

										}

									}


								}


								func buildTMStateFromChain(config *cfg.Config, state *sm.State, chain []*types.Block, mode uint) []byte {

									// run the whole chain against this client to build up the tendermint state

									clientCreator := proxy.NewLocalClientCreator(dummy.NewPersistentDummyApplication(path.Join(config.DBDir(), "1")))

									proxyApp := proxy.NewAppConns(clientCreator, nil) // sm.NewHandshaker(config, state, store, ReplayLastBlock))

									if err := proxyApp.Start(); err != nil {

										panic(err)

									}

									defer proxyApp.Stop()


									validators := types.TM2PB.Validators(state.Validators)

									if _, err := proxyApp.Consensus().InitChainSync(abci.RequestInitChain{validators}); err != nil {

										panic(err)

									}


									var latestAppHash []byte


									switch mode {

									case 0:

										// sync right up

										for _, block := range chain {

											applyBlock(state, block, proxyApp)

										}


										latestAppHash = state.AppHash

									case 1, 2:

										// sync up to the penultimate as if we stored the block.

										// whether we commit or not depends on the appHash

										for _, block := range chain[:len(chain)-1] {

											applyBlock(state, block, proxyApp)

										}


										// apply the final block to a state copy so we can

										// get the right next appHash but keep the state back

										stateCopy := state.Copy()

										applyBlock(stateCopy, chain[len(chain)-1], proxyApp)

										latestAppHash = stateCopy.AppHash

									}


									return latestAppHash

								}


								//--------------------------

								// utils for making blocks


								func makeBlockchainFromWAL(wal WAL) ([]*types.Block, []*types.Commit, error) {

									// Search for height marker

									gr, found, err := wal.SearchForEndHeight(0, &WALSearchOptions{})

									if err != nil {

										return nil, nil, err

									}

									if !found {

										return nil, nil, errors.New(cmn.Fmt("WAL does not contain height %d.", 1))

									}

									defer gr.Close() // nolint: errcheck


									// log.Notice("Build a blockchain by reading from the WAL")


									var blockParts *types.PartSet

									var blocks []*types.Block

									var commits []*types.Commit


									dec := NewWALDecoder(gr)

									for {

										msg, err := dec.Decode()

										if err == io.EOF {

											break

										} else if err != nil {

											return nil, nil, err

										}


										piece := readPieceFromWAL(msg)

										if piece == nil {

											continue

										}


										switch p := piece.(type) {

										case *types.PartSetHeader:

											// if its not the first one, we have a full block

											if blockParts != nil {

												var n int

												block := wire.ReadBinary(&types.Block{}, blockParts.GetReader(), 0, &n, &err).(*types.Block)

												blocks = append(blocks, block)

											}

											blockParts = types.NewPartSetFromHeader(*p)

										case *types.Part:

											_, err := blockParts.AddPart(p, false)

											if err != nil {

												return nil, nil, err

											}

										case *types.Vote:

											if p.Type == types.VoteTypePrecommit {

												commit := &types.Commit{

													BlockID:    p.BlockID,

													Precommits: []*types.Vote{p},

												}

												commits = append(commits, commit)

											}

										}

									}

									// grab the last block too

									var n int

									block := wire.ReadBinary(&types.Block{}, blockParts.GetReader(), 0, &n, &err).(*types.Block)

									blocks = append(blocks, block)

									return blocks, commits, nil

								}


								func readPieceFromWAL(msg *TimedWALMessage) interface{} {

									// skip meta messages

									if _, ok := msg.Msg.(EndHeightMessage); ok {

										return nil

									}


									// for logging

									switch m := msg.Msg.(type) {

									case msgInfo:

										switch msg := m.Msg.(type) {

										case *ProposalMessage:

											return &msg.Proposal.BlockPartsHeader

										case *BlockPartMessage:

											return msg.Part

										case *VoteMessage:

											return msg.Vote

										}

									}


									return nil

								}


								// fresh state and mock store

								func stateAndStore(config *cfg.Config, pubKey crypto.PubKey) (*sm.State, *mockBlockStore) {

									stateDB := dbm.NewMemDB()

									state, _ := sm.MakeGenesisStateFromFile(stateDB, config.GenesisFile())

									state.SetLogger(log.TestingLogger().With("module", "state"))


									store := NewMockBlockStore(config, state.Params)

									return state, store

								}


								//----------------------------------

								// mock block store


								type mockBlockStore struct {

									config  *cfg.Config

									params  types.ConsensusParams

									chain   []*types.Block

									commits []*types.Commit

								}


								// TODO: NewBlockStore(db.NewMemDB) ...

								func NewMockBlockStore(config *cfg.Config, params types.ConsensusParams) *mockBlockStore {

									return &mockBlockStore{config, params, nil, nil}

								}


								func (bs *mockBlockStore) Height() int64                       { return int64(len(bs.chain)) }

								func (bs *mockBlockStore) LoadBlock(height int64) *types.Block { return bs.chain[height-1] }

								func (bs *mockBlockStore) LoadBlockMeta(height int64) *types.BlockMeta {

									block := bs.chain[height-1]

									return &types.BlockMeta{

										BlockID: types.BlockID{block.Hash(), block.MakePartSet(bs.params.BlockPartSizeBytes).Header()},

										Header:  block.Header,

									}

								}

								func (bs *mockBlockStore) LoadBlockPart(height int64, index int) *types.Part { return nil }

								func (bs *mockBlockStore) SaveBlock(block *types.Block, blockParts *types.PartSet, seenCommit *types.Commit) {

								}

								func (bs *mockBlockStore) LoadBlockCommit(height int64) *types.Commit {

									return bs.commits[height-1]

								}

								func (bs *mockBlockStore) LoadSeenCommit(height int64) *types.Commit {

									return bs.commits[height-1]

								}