You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

601 lines
18 KiB

8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
9 years ago
8 years ago
8 years ago
8 years ago
7 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
7 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
  1. package consensus
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "os"
  9. "path"
  10. "strings"
  11. "testing"
  12. "time"
  13. "github.com/tendermint/abci/example/dummy"
  14. abci "github.com/tendermint/abci/types"
  15. crypto "github.com/tendermint/go-crypto"
  16. wire "github.com/tendermint/go-wire"
  17. cmn "github.com/tendermint/tmlibs/common"
  18. dbm "github.com/tendermint/tmlibs/db"
  19. cfg "github.com/tendermint/tendermint/config"
  20. "github.com/tendermint/tendermint/proxy"
  21. sm "github.com/tendermint/tendermint/state"
  22. "github.com/tendermint/tendermint/types"
  23. "github.com/tendermint/tmlibs/log"
  24. )
  25. func init() {
  26. config = ResetConfig("consensus_replay_test")
  27. }
  28. // These tests ensure we can always recover from failure at any part of the consensus process.
  29. // There are two general failure scenarios: failure during consensus, and failure while applying the block.
  30. // Only the latter interacts with the app and store,
  31. // but the former has to deal with restrictions on re-use of priv_validator keys.
  32. // The `WAL Tests` are for failures during the consensus;
  33. // the `Handshake Tests` are for failures in applying the block.
  34. // With the help of the WAL, we can recover from it all!
  35. // NOTE: Files in this dir are generated by running the `build.sh` therein.
  36. // It's a simple way to generate wals for a single block, or multiple blocks, with random transactions,
  37. // and different part sizes. The output is not deterministic, and the stepChanges may need to be adjusted
  38. // after running it (eg. sometimes small_block2 will have 5 block parts, sometimes 6).
  39. // It should only have to be re-run if there is some breaking change to the consensus data structures (eg. blocks, votes)
  40. // or to the behaviour of the app (eg. computes app hash differently)
  41. var data_dir = path.Join(cmn.GoPath, "src/github.com/tendermint/tendermint/consensus", "test_data")
  42. //------------------------------------------------------------------------------------------
  43. // WAL Tests
  44. // TODO: It would be better to verify explicitly which states we can recover from without the wal
  45. // and which ones we need the wal for - then we'd also be able to only flush the
  46. // wal writer when we need to, instead of with every message.
  47. // the priv validator changes step at these lines for a block with 1 val and 1 part
  48. var baseStepChanges = []int{3, 6, 8}
  49. // test recovery from each line in each testCase
  50. var testCases = []*testCase{
  51. newTestCase("empty_block", baseStepChanges), // empty block (has 1 block part)
  52. newTestCase("small_block1", baseStepChanges), // small block with txs in 1 block part
  53. newTestCase("small_block2", []int{3, 11, 13}), // small block with txs across 6 smaller block parts
  54. }
  55. type testCase struct {
  56. name string
  57. log string //full cs wal
  58. stepMap map[int]int8 // map lines of log to privval step
  59. proposeLine int
  60. prevoteLine int
  61. precommitLine int
  62. }
  63. func newTestCase(name string, stepChanges []int) *testCase {
  64. if len(stepChanges) != 3 {
  65. panic(cmn.Fmt("a full wal has 3 step changes! Got array %v", stepChanges))
  66. }
  67. return &testCase{
  68. name: name,
  69. log: readWAL(path.Join(data_dir, name+".cswal")),
  70. stepMap: newMapFromChanges(stepChanges),
  71. proposeLine: stepChanges[0],
  72. prevoteLine: stepChanges[1],
  73. precommitLine: stepChanges[2],
  74. }
  75. }
  76. func newMapFromChanges(changes []int) map[int]int8 {
  77. changes = append(changes, changes[2]+1) // so we add the last step change to the map
  78. m := make(map[int]int8)
  79. var count int
  80. for changeNum, nextChange := range changes {
  81. for ; count < nextChange; count++ {
  82. m[count] = int8(changeNum)
  83. }
  84. }
  85. return m
  86. }
  87. func readWAL(p string) string {
  88. b, err := ioutil.ReadFile(p)
  89. if err != nil {
  90. panic(err)
  91. }
  92. return string(b)
  93. }
  94. func writeWAL(walMsgs string) string {
  95. tempDir := os.TempDir()
  96. walDir := path.Join(tempDir, "/wal"+cmn.RandStr(12))
  97. walFile := path.Join(walDir, "wal")
  98. // Create WAL directory
  99. err := cmn.EnsureDir(walDir, 0700)
  100. if err != nil {
  101. panic(err)
  102. }
  103. // Write the needed WAL to file
  104. err = cmn.WriteFile(walFile, []byte(walMsgs), 0600)
  105. if err != nil {
  106. panic(err)
  107. }
  108. return walFile
  109. }
  110. func waitForBlock(newBlockCh chan interface{}, thisCase *testCase, i int) {
  111. after := time.After(time.Second * 10)
  112. select {
  113. case <-newBlockCh:
  114. case <-after:
  115. panic(cmn.Fmt("Timed out waiting for new block for case '%s' line %d", thisCase.name, i))
  116. }
  117. }
  118. func runReplayTest(t *testing.T, cs *ConsensusState, walFile string, newBlockCh chan interface{},
  119. thisCase *testCase, i int) {
  120. cs.config.SetWalFile(walFile)
  121. started, err := cs.Start()
  122. if err != nil {
  123. t.Fatalf("Cannot start consensus: %v", err)
  124. }
  125. if !started {
  126. t.Error("Consensus did not start")
  127. }
  128. // Wait to make a new block.
  129. // This is just a signal that we haven't halted; its not something contained in the WAL itself.
  130. // Assuming the consensus state is running, replay of any WAL, including the empty one,
  131. // should eventually be followed by a new block, or else something is wrong
  132. waitForBlock(newBlockCh, thisCase, i)
  133. cs.evsw.Stop()
  134. cs.Stop()
  135. LOOP:
  136. for {
  137. select {
  138. case <-newBlockCh:
  139. default:
  140. break LOOP
  141. }
  142. }
  143. cs.Wait()
  144. }
  145. func toPV(pv types.PrivValidator) *types.PrivValidatorFS {
  146. return pv.(*types.PrivValidatorFS)
  147. }
  148. func setupReplayTest(t *testing.T, thisCase *testCase, nLines int, crashAfter bool) (*ConsensusState, chan interface{}, string, string) {
  149. t.Log("-------------------------------------")
  150. t.Logf("Starting replay test %v (of %d lines of WAL). Crash after = %v", thisCase.name, nLines, crashAfter)
  151. lineStep := nLines
  152. if crashAfter {
  153. lineStep -= 1
  154. }
  155. split := strings.Split(thisCase.log, "\n")
  156. lastMsg := split[nLines]
  157. // we write those lines up to (not including) one with the signature
  158. walFile := writeWAL(strings.Join(split[:nLines], "\n") + "\n")
  159. cs := fixedConsensusStateDummy()
  160. // set the last step according to when we crashed vs the wal
  161. toPV(cs.privValidator).LastHeight = 1 // first block
  162. toPV(cs.privValidator).LastStep = thisCase.stepMap[lineStep]
  163. t.Logf("[WARN] setupReplayTest LastStep=%v", toPV(cs.privValidator).LastStep)
  164. newBlockCh := subscribeToEvent(cs.evsw, "tester", types.EventStringNewBlock(), 1)
  165. return cs, newBlockCh, lastMsg, walFile
  166. }
  167. func readTimedWALMessage(t *testing.T, walMsg string) TimedWALMessage {
  168. var err error
  169. var msg TimedWALMessage
  170. wire.ReadJSON(&msg, []byte(walMsg), &err)
  171. if err != nil {
  172. t.Fatalf("Error reading json data: %v", err)
  173. }
  174. return msg
  175. }
  176. //-----------------------------------------------
  177. // Test the log at every iteration, and set the privVal last step
  178. // as if the log was written after signing, before the crash
  179. func TestWALCrashAfterWrite(t *testing.T) {
  180. for _, thisCase := range testCases {
  181. split := strings.Split(thisCase.log, "\n")
  182. for i := 0; i < len(split)-1; i++ {
  183. cs, newBlockCh, _, walFile := setupReplayTest(t, thisCase, i+1, true)
  184. runReplayTest(t, cs, walFile, newBlockCh, thisCase, i+1)
  185. }
  186. }
  187. }
  188. //-----------------------------------------------
  189. // Test the log as if we crashed after signing but before writing.
  190. // This relies on privValidator.LastSignature being set
  191. func TestWALCrashBeforeWritePropose(t *testing.T) {
  192. for _, thisCase := range testCases {
  193. lineNum := thisCase.proposeLine
  194. // setup replay test where last message is a proposal
  195. cs, newBlockCh, proposalMsg, walFile := setupReplayTest(t, thisCase, lineNum, false)
  196. msg := readTimedWALMessage(t, proposalMsg)
  197. proposal := msg.Msg.(msgInfo).Msg.(*ProposalMessage)
  198. // Set LastSig
  199. toPV(cs.privValidator).LastSignBytes = types.SignBytes(cs.state.ChainID, proposal.Proposal)
  200. toPV(cs.privValidator).LastSignature = proposal.Proposal.Signature
  201. runReplayTest(t, cs, walFile, newBlockCh, thisCase, lineNum)
  202. }
  203. }
  204. func TestWALCrashBeforeWritePrevote(t *testing.T) {
  205. for _, thisCase := range testCases {
  206. testReplayCrashBeforeWriteVote(t, thisCase, thisCase.prevoteLine, types.EventStringCompleteProposal())
  207. }
  208. }
  209. func TestWALCrashBeforeWritePrecommit(t *testing.T) {
  210. for _, thisCase := range testCases {
  211. testReplayCrashBeforeWriteVote(t, thisCase, thisCase.precommitLine, types.EventStringPolka())
  212. }
  213. }
  214. func testReplayCrashBeforeWriteVote(t *testing.T, thisCase *testCase, lineNum int, eventString string) {
  215. // setup replay test where last message is a vote
  216. cs, newBlockCh, voteMsg, walFile := setupReplayTest(t, thisCase, lineNum, false)
  217. types.AddListenerForEvent(cs.evsw, "tester", eventString, func(data types.TMEventData) {
  218. msg := readTimedWALMessage(t, voteMsg)
  219. vote := msg.Msg.(msgInfo).Msg.(*VoteMessage)
  220. // Set LastSig
  221. toPV(cs.privValidator).LastSignBytes = types.SignBytes(cs.state.ChainID, vote.Vote)
  222. toPV(cs.privValidator).LastSignature = vote.Vote.Signature
  223. })
  224. runReplayTest(t, cs, walFile, newBlockCh, thisCase, lineNum)
  225. }
  226. //------------------------------------------------------------------------------------------
  227. // Handshake Tests
  228. var (
  229. NUM_BLOCKS = 6 // number of blocks in the test_data/many_blocks.cswal
  230. mempool = types.MockMempool{}
  231. )
  232. //---------------------------------------
  233. // Test handshake/replay
  234. // 0 - all synced up
  235. // 1 - saved block but app and state are behind
  236. // 2 - save block and committed but state is behind
  237. var modes = []uint{0, 1, 2}
  238. // Sync from scratch
  239. func TestHandshakeReplayAll(t *testing.T) {
  240. for _, m := range modes {
  241. testHandshakeReplay(t, 0, m)
  242. }
  243. }
  244. // Sync many, not from scratch
  245. func TestHandshakeReplaySome(t *testing.T) {
  246. for _, m := range modes {
  247. testHandshakeReplay(t, 1, m)
  248. }
  249. }
  250. // Sync from lagging by one
  251. func TestHandshakeReplayOne(t *testing.T) {
  252. for _, m := range modes {
  253. testHandshakeReplay(t, NUM_BLOCKS-1, m)
  254. }
  255. }
  256. // Sync from caught up
  257. func TestHandshakeReplayNone(t *testing.T) {
  258. for _, m := range modes {
  259. testHandshakeReplay(t, NUM_BLOCKS, m)
  260. }
  261. }
  262. // Make some blocks. Start a fresh app and apply nBlocks blocks. Then restart the app and sync it up with the remaining blocks
  263. func testHandshakeReplay(t *testing.T, nBlocks int, mode uint) {
  264. config := ResetConfig("proxy_test_")
  265. // copy the many_blocks file
  266. walBody, err := cmn.ReadFile(path.Join(data_dir, "many_blocks.cswal"))
  267. if err != nil {
  268. t.Fatal(err)
  269. }
  270. walFile := writeWAL(string(walBody))
  271. config.Consensus.SetWalFile(walFile)
  272. privVal := types.LoadPrivValidatorFS(config.PrivValidatorFile())
  273. wal, err := NewWAL(walFile, false)
  274. if err != nil {
  275. t.Fatal(err)
  276. }
  277. wal.SetLogger(log.TestingLogger())
  278. if _, err := wal.Start(); err != nil {
  279. t.Fatal(err)
  280. }
  281. chain, commits, err := makeBlockchainFromWAL(wal)
  282. if err != nil {
  283. t.Fatalf(err.Error())
  284. }
  285. state, store := stateAndStore(config, privVal.GetPubKey())
  286. store.chain = chain
  287. store.commits = commits
  288. // run the chain through state.ApplyBlock to build up the tendermint state
  289. latestAppHash := buildTMStateFromChain(config, state, chain, mode)
  290. // make a new client creator
  291. dummyApp := dummy.NewPersistentDummyApplication(path.Join(config.DBDir(), "2"))
  292. clientCreator2 := proxy.NewLocalClientCreator(dummyApp)
  293. if nBlocks > 0 {
  294. // run nBlocks against a new client to build up the app state.
  295. // use a throwaway tendermint state
  296. proxyApp := proxy.NewAppConns(clientCreator2, nil)
  297. state, _ := stateAndStore(config, privVal.GetPubKey())
  298. buildAppStateFromChain(proxyApp, state, chain, nBlocks, mode)
  299. }
  300. // now start the app using the handshake - it should sync
  301. handshaker := NewHandshaker(state, store)
  302. proxyApp := proxy.NewAppConns(clientCreator2, handshaker)
  303. if _, err := proxyApp.Start(); err != nil {
  304. t.Fatalf("Error starting proxy app connections: %v", err)
  305. }
  306. // get the latest app hash from the app
  307. res, err := proxyApp.Query().InfoSync(abci.RequestInfo{""})
  308. if err != nil {
  309. t.Fatal(err)
  310. }
  311. // the app hash should be synced up
  312. if !bytes.Equal(latestAppHash, res.LastBlockAppHash) {
  313. t.Fatalf("Expected app hashes to match after handshake/replay. got %X, expected %X", res.LastBlockAppHash, latestAppHash)
  314. }
  315. expectedBlocksToSync := NUM_BLOCKS - nBlocks
  316. if nBlocks == NUM_BLOCKS && mode > 0 {
  317. expectedBlocksToSync += 1
  318. } else if nBlocks > 0 && mode == 1 {
  319. expectedBlocksToSync += 1
  320. }
  321. if handshaker.NBlocks() != expectedBlocksToSync {
  322. t.Fatalf("Expected handshake to sync %d blocks, got %d", expectedBlocksToSync, handshaker.NBlocks())
  323. }
  324. }
  325. func applyBlock(st *sm.State, blk *types.Block, proxyApp proxy.AppConns) {
  326. testPartSize := st.Params().BlockPartSizeBytes
  327. err := st.ApplyBlock(nil, proxyApp.Consensus(), blk, blk.MakePartSet(testPartSize).Header(), mempool)
  328. if err != nil {
  329. panic(err)
  330. }
  331. }
  332. func buildAppStateFromChain(proxyApp proxy.AppConns,
  333. state *sm.State, chain []*types.Block, nBlocks int, mode uint) {
  334. // start a new app without handshake, play nBlocks blocks
  335. if _, err := proxyApp.Start(); err != nil {
  336. panic(err)
  337. }
  338. validators := types.TM2PB.Validators(state.Validators)
  339. proxyApp.Consensus().InitChainSync(abci.RequestInitChain{validators})
  340. defer proxyApp.Stop()
  341. switch mode {
  342. case 0:
  343. for i := 0; i < nBlocks; i++ {
  344. block := chain[i]
  345. applyBlock(state, block, proxyApp)
  346. }
  347. case 1, 2:
  348. for i := 0; i < nBlocks-1; i++ {
  349. block := chain[i]
  350. applyBlock(state, block, proxyApp)
  351. }
  352. if mode == 2 {
  353. // update the dummy height and apphash
  354. // as if we ran commit but not
  355. applyBlock(state, chain[nBlocks-1], proxyApp)
  356. }
  357. }
  358. }
  359. func buildTMStateFromChain(config *cfg.Config, state *sm.State, chain []*types.Block, mode uint) []byte {
  360. // run the whole chain against this client to build up the tendermint state
  361. clientCreator := proxy.NewLocalClientCreator(dummy.NewPersistentDummyApplication(path.Join(config.DBDir(), "1")))
  362. proxyApp := proxy.NewAppConns(clientCreator, nil) // sm.NewHandshaker(config, state, store, ReplayLastBlock))
  363. if _, err := proxyApp.Start(); err != nil {
  364. panic(err)
  365. }
  366. defer proxyApp.Stop()
  367. validators := types.TM2PB.Validators(state.Validators)
  368. proxyApp.Consensus().InitChainSync(abci.RequestInitChain{validators})
  369. var latestAppHash []byte
  370. switch mode {
  371. case 0:
  372. // sync right up
  373. for _, block := range chain {
  374. applyBlock(state, block, proxyApp)
  375. }
  376. latestAppHash = state.AppHash
  377. case 1, 2:
  378. // sync up to the penultimate as if we stored the block.
  379. // whether we commit or not depends on the appHash
  380. for _, block := range chain[:len(chain)-1] {
  381. applyBlock(state, block, proxyApp)
  382. }
  383. // apply the final block to a state copy so we can
  384. // get the right next appHash but keep the state back
  385. stateCopy := state.Copy()
  386. applyBlock(stateCopy, chain[len(chain)-1], proxyApp)
  387. latestAppHash = stateCopy.AppHash
  388. }
  389. return latestAppHash
  390. }
  391. //--------------------------
  392. // utils for making blocks
  393. func makeBlockchainFromWAL(wal *WAL) ([]*types.Block, []*types.Commit, error) {
  394. // Search for height marker
  395. gr, found, err := wal.group.Search("#ENDHEIGHT: ", makeHeightSearchFunc(0))
  396. if err != nil {
  397. return nil, nil, err
  398. }
  399. if !found {
  400. return nil, nil, errors.New(cmn.Fmt("WAL does not contain height %d.", 1))
  401. }
  402. defer gr.Close()
  403. // log.Notice("Build a blockchain by reading from the WAL")
  404. var blockParts *types.PartSet
  405. var blocks []*types.Block
  406. var commits []*types.Commit
  407. for {
  408. line, err := gr.ReadLine()
  409. if err != nil {
  410. if err == io.EOF {
  411. break
  412. } else {
  413. return nil, nil, err
  414. }
  415. }
  416. piece, err := readPieceFromWAL([]byte(line))
  417. if err != nil {
  418. return nil, nil, err
  419. }
  420. if piece == nil {
  421. continue
  422. }
  423. switch p := piece.(type) {
  424. case *types.PartSetHeader:
  425. // if its not the first one, we have a full block
  426. if blockParts != nil {
  427. var n int
  428. block := wire.ReadBinary(&types.Block{}, blockParts.GetReader(), 0, &n, &err).(*types.Block)
  429. blocks = append(blocks, block)
  430. }
  431. blockParts = types.NewPartSetFromHeader(*p)
  432. case *types.Part:
  433. _, err := blockParts.AddPart(p, false)
  434. if err != nil {
  435. return nil, nil, err
  436. }
  437. case *types.Vote:
  438. if p.Type == types.VoteTypePrecommit {
  439. commit := &types.Commit{
  440. BlockID: p.BlockID,
  441. Precommits: []*types.Vote{p},
  442. }
  443. commits = append(commits, commit)
  444. }
  445. }
  446. }
  447. // grab the last block too
  448. var n int
  449. block := wire.ReadBinary(&types.Block{}, blockParts.GetReader(), 0, &n, &err).(*types.Block)
  450. blocks = append(blocks, block)
  451. return blocks, commits, nil
  452. }
  453. func readPieceFromWAL(msgBytes []byte) (interface{}, error) {
  454. // Skip over empty and meta lines
  455. if len(msgBytes) == 0 || msgBytes[0] == '#' {
  456. return nil, nil
  457. }
  458. var err error
  459. var msg TimedWALMessage
  460. wire.ReadJSON(&msg, msgBytes, &err)
  461. if err != nil {
  462. fmt.Println("MsgBytes:", msgBytes, string(msgBytes))
  463. return nil, fmt.Errorf("Error reading json data: %v", err)
  464. }
  465. // for logging
  466. switch m := msg.Msg.(type) {
  467. case msgInfo:
  468. switch msg := m.Msg.(type) {
  469. case *ProposalMessage:
  470. return &msg.Proposal.BlockPartsHeader, nil
  471. case *BlockPartMessage:
  472. return msg.Part, nil
  473. case *VoteMessage:
  474. return msg.Vote, nil
  475. }
  476. }
  477. return nil, nil
  478. }
  479. // fresh state and mock store
  480. func stateAndStore(config *cfg.Config, pubKey crypto.PubKey) (*sm.State, *mockBlockStore) {
  481. stateDB := dbm.NewMemDB()
  482. state, _ := sm.MakeGenesisStateFromFile(stateDB, config.GenesisFile())
  483. state.SetLogger(log.TestingLogger().With("module", "state"))
  484. store := NewMockBlockStore(config, state.Params())
  485. return state, store
  486. }
  487. //----------------------------------
  488. // mock block store
  489. type mockBlockStore struct {
  490. config *cfg.Config
  491. params types.ConsensusParams
  492. chain []*types.Block
  493. commits []*types.Commit
  494. }
  495. // TODO: NewBlockStore(db.NewMemDB) ...
  496. func NewMockBlockStore(config *cfg.Config, params types.ConsensusParams) *mockBlockStore {
  497. return &mockBlockStore{config, params, nil, nil}
  498. }
  499. func (bs *mockBlockStore) Height() int { return len(bs.chain) }
  500. func (bs *mockBlockStore) LoadBlock(height int) *types.Block { return bs.chain[height-1] }
  501. func (bs *mockBlockStore) LoadBlockMeta(height int) *types.BlockMeta {
  502. block := bs.chain[height-1]
  503. return &types.BlockMeta{
  504. BlockID: types.BlockID{block.Hash(), block.MakePartSet(bs.params.BlockPartSizeBytes).Header()},
  505. Header: block.Header,
  506. }
  507. }
  508. func (bs *mockBlockStore) LoadBlockPart(height int, index int) *types.Part { return nil }
  509. func (bs *mockBlockStore) SaveBlock(block *types.Block, blockParts *types.PartSet, seenCommit *types.Commit) {
  510. }
  511. func (bs *mockBlockStore) LoadBlockCommit(height int) *types.Commit {
  512. return bs.commits[height-1]
  513. }
  514. func (bs *mockBlockStore) LoadSeenCommit(height int) *types.Commit {
  515. return bs.commits[height-1]
  516. }