You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

641 lines
19 KiB

8 years ago
8 years ago
9 years ago
8 years ago
8 years ago
  1. package consensus
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "os"
  9. "path"
  10. "strings"
  11. "testing"
  12. "time"
  13. "github.com/tendermint/tendermint/config/tendermint_test"
  14. "github.com/tendermint/abci/example/dummy"
  15. cmn "github.com/tendermint/go-common"
  16. cfg "github.com/tendermint/go-config"
  17. "github.com/tendermint/go-crypto"
  18. dbm "github.com/tendermint/go-db"
  19. "github.com/tendermint/go-wire"
  20. "github.com/tendermint/tendermint/proxy"
  21. sm "github.com/tendermint/tendermint/state"
  22. "github.com/tendermint/tendermint/types"
  23. )
  24. func init() {
  25. config = tendermint_test.ResetConfig("consensus_replay_test")
  26. }
  27. // These tests ensure we can always recover from failure at any part of the consensus process.
  28. // There are two general failure scenarios: failure during consensus, and failure while applying the block.
  29. // Only the latter interacts with the app and store,
  30. // but the former has to deal with restrictions on re-use of priv_validator keys.
  31. // The `WAL Tests` are for failures during the consensus;
  32. // the `Handshake Tests` are for failures in applying the block.
  33. // With the help of the WAL, we can recover from it all!
  34. var data_dir = path.Join(cmn.GoPath, "src/github.com/tendermint/tendermint/consensus", "test_data")
  35. //------------------------------------------------------------------------------------------
  36. // WAL Tests
  37. // TODO: It would be better to verify explicitly which states we can recover from without the wal
  38. // and which ones we need the wal for - then we'd also be able to only flush the
  39. // wal writer when we need to, instead of with every message.
  40. // the priv validator changes step at these lines for a block with 1 val and 1 part
  41. var baseStepChanges = []int{3, 6, 8}
  42. // test recovery from each line in each testCase
  43. var testCases = []*testCase{
  44. newTestCase("empty_block", baseStepChanges), // empty block (has 1 block part)
  45. newTestCase("small_block1", baseStepChanges), // small block with txs in 1 block part
  46. newTestCase("small_block2", []int{3, 10, 12}), // small block with txs across 5 smaller block parts
  47. }
  48. type testCase struct {
  49. name string
  50. log string //full cs wal
  51. stepMap map[int]int8 // map lines of log to privval step
  52. proposeLine int
  53. prevoteLine int
  54. precommitLine int
  55. }
  56. func newTestCase(name string, stepChanges []int) *testCase {
  57. if len(stepChanges) != 3 {
  58. panic(cmn.Fmt("a full wal has 3 step changes! Got array %v", stepChanges))
  59. }
  60. return &testCase{
  61. name: name,
  62. log: readWAL(path.Join(data_dir, name+".cswal")),
  63. stepMap: newMapFromChanges(stepChanges),
  64. proposeLine: stepChanges[0],
  65. prevoteLine: stepChanges[1],
  66. precommitLine: stepChanges[2],
  67. }
  68. }
  69. func newMapFromChanges(changes []int) map[int]int8 {
  70. changes = append(changes, changes[2]+1) // so we add the last step change to the map
  71. m := make(map[int]int8)
  72. var count int
  73. for changeNum, nextChange := range changes {
  74. for ; count < nextChange; count++ {
  75. m[count] = int8(changeNum)
  76. }
  77. }
  78. return m
  79. }
  80. func readWAL(p string) string {
  81. b, err := ioutil.ReadFile(p)
  82. if err != nil {
  83. panic(err)
  84. }
  85. return string(b)
  86. }
  87. func writeWAL(walMsgs string) string {
  88. tempDir := os.TempDir()
  89. walDir := path.Join(tempDir, "/wal"+cmn.RandStr(12))
  90. walFile := path.Join(walDir, "wal")
  91. // Create WAL directory
  92. err := cmn.EnsureDir(walDir, 0700)
  93. if err != nil {
  94. panic(err)
  95. }
  96. // Write the needed WAL to file
  97. err = cmn.WriteFile(walFile, []byte(walMsgs), 0600)
  98. if err != nil {
  99. panic(err)
  100. }
  101. return walFile
  102. }
  103. func waitForBlock(newBlockCh chan interface{}, thisCase *testCase, i int) {
  104. after := time.After(time.Second * 10)
  105. select {
  106. case <-newBlockCh:
  107. case <-after:
  108. panic(cmn.Fmt("Timed out waiting for new block for case '%s' line %d", thisCase.name, i))
  109. }
  110. }
  111. func runReplayTest(t *testing.T, cs *ConsensusState, walFile string, newBlockCh chan interface{},
  112. thisCase *testCase, i int) {
  113. cs.config.Set("cs_wal_file", walFile)
  114. cs.Start()
  115. // Wait to make a new block.
  116. // This is just a signal that we haven't halted; its not something contained in the WAL itself.
  117. // Assuming the consensus state is running, replay of any WAL, including the empty one,
  118. // should eventually be followed by a new block, or else something is wrong
  119. waitForBlock(newBlockCh, thisCase, i)
  120. cs.evsw.Stop()
  121. cs.Stop()
  122. LOOP:
  123. for {
  124. select {
  125. case <-newBlockCh:
  126. default:
  127. break LOOP
  128. }
  129. }
  130. cs.Wait()
  131. }
  132. func toPV(pv PrivValidator) *types.PrivValidator {
  133. return pv.(*types.PrivValidator)
  134. }
  135. func setupReplayTest(thisCase *testCase, nLines int, crashAfter bool) (*ConsensusState, chan interface{}, string, string) {
  136. fmt.Println("-------------------------------------")
  137. log.Notice(cmn.Fmt("Starting replay test %v (of %d lines of WAL). Crash after = %v", thisCase.name, nLines, crashAfter))
  138. lineStep := nLines
  139. if crashAfter {
  140. lineStep -= 1
  141. }
  142. split := strings.Split(thisCase.log, "\n")
  143. lastMsg := split[nLines]
  144. // we write those lines up to (not including) one with the signature
  145. walFile := writeWAL(strings.Join(split[:nLines], "\n") + "\n")
  146. cs := fixedConsensusStateDummy()
  147. // set the last step according to when we crashed vs the wal
  148. toPV(cs.privValidator).LastHeight = 1 // first block
  149. toPV(cs.privValidator).LastStep = thisCase.stepMap[lineStep]
  150. log.Warn("setupReplayTest", "LastStep", toPV(cs.privValidator).LastStep)
  151. newBlockCh := subscribeToEvent(cs.evsw, "tester", types.EventStringNewBlock(), 1)
  152. return cs, newBlockCh, lastMsg, walFile
  153. }
  154. func readTimedWALMessage(t *testing.T, walMsg string) TimedWALMessage {
  155. var err error
  156. var msg TimedWALMessage
  157. wire.ReadJSON(&msg, []byte(walMsg), &err)
  158. if err != nil {
  159. t.Fatalf("Error reading json data: %v", err)
  160. }
  161. return msg
  162. }
  163. //-----------------------------------------------
  164. // Test the log at every iteration, and set the privVal last step
  165. // as if the log was written after signing, before the crash
  166. func TestWALCrashAfterWrite(t *testing.T) {
  167. for _, thisCase := range testCases {
  168. split := strings.Split(thisCase.log, "\n")
  169. for i := 0; i < len(split)-1; i++ {
  170. cs, newBlockCh, _, walFile := setupReplayTest(thisCase, i+1, true)
  171. runReplayTest(t, cs, walFile, newBlockCh, thisCase, i+1)
  172. }
  173. }
  174. }
  175. //-----------------------------------------------
  176. // Test the log as if we crashed after signing but before writing.
  177. // This relies on privValidator.LastSignature being set
  178. func TestWALCrashBeforeWritePropose(t *testing.T) {
  179. for _, thisCase := range testCases {
  180. lineNum := thisCase.proposeLine
  181. // setup replay test where last message is a proposal
  182. cs, newBlockCh, proposalMsg, walFile := setupReplayTest(thisCase, lineNum, false)
  183. msg := readTimedWALMessage(t, proposalMsg)
  184. proposal := msg.Msg.(msgInfo).Msg.(*ProposalMessage)
  185. // Set LastSig
  186. toPV(cs.privValidator).LastSignBytes = types.SignBytes(cs.state.ChainID, proposal.Proposal)
  187. toPV(cs.privValidator).LastSignature = proposal.Proposal.Signature
  188. runReplayTest(t, cs, walFile, newBlockCh, thisCase, lineNum)
  189. }
  190. }
  191. func TestWALCrashBeforeWritePrevote(t *testing.T) {
  192. for _, thisCase := range testCases {
  193. testReplayCrashBeforeWriteVote(t, thisCase, thisCase.prevoteLine, types.EventStringCompleteProposal())
  194. }
  195. }
  196. func TestWALCrashBeforeWritePrecommit(t *testing.T) {
  197. for _, thisCase := range testCases {
  198. testReplayCrashBeforeWriteVote(t, thisCase, thisCase.precommitLine, types.EventStringPolka())
  199. }
  200. }
  201. func testReplayCrashBeforeWriteVote(t *testing.T, thisCase *testCase, lineNum int, eventString string) {
  202. // setup replay test where last message is a vote
  203. cs, newBlockCh, voteMsg, walFile := setupReplayTest(thisCase, lineNum, false)
  204. types.AddListenerForEvent(cs.evsw, "tester", eventString, func(data types.TMEventData) {
  205. msg := readTimedWALMessage(t, voteMsg)
  206. vote := msg.Msg.(msgInfo).Msg.(*VoteMessage)
  207. // Set LastSig
  208. toPV(cs.privValidator).LastSignBytes = types.SignBytes(cs.state.ChainID, vote.Vote)
  209. toPV(cs.privValidator).LastSignature = vote.Vote.Signature
  210. })
  211. runReplayTest(t, cs, walFile, newBlockCh, thisCase, lineNum)
  212. }
  213. //------------------------------------------------------------------------------------------
  214. // Handshake Tests
  215. var (
  216. NUM_BLOCKS = 6 // number of blocks in the test_data/many_blocks.cswal
  217. mempool = types.MockMempool{}
  218. testPartSize int
  219. )
  220. //---------------------------------------
  221. // Test handshake/replay
  222. // 0 - all synced up
  223. // 1 - saved block but app and state are behind
  224. // 2 - save block and committed but state is behind
  225. var modes = []uint{0, 1, 2}
  226. // Sync from scratch
  227. func TestHandshakeReplayAll(t *testing.T) {
  228. for _, m := range modes {
  229. testHandshakeReplay(t, 0, m)
  230. }
  231. }
  232. // Sync many, not from scratch
  233. func TestHandshakeReplaySome(t *testing.T) {
  234. for _, m := range modes {
  235. testHandshakeReplay(t, 1, m)
  236. }
  237. }
  238. // Sync from lagging by one
  239. func TestHandshakeReplayOne(t *testing.T) {
  240. for _, m := range modes {
  241. testHandshakeReplay(t, NUM_BLOCKS-1, m)
  242. }
  243. }
  244. // Sync from caught up
  245. func TestHandshakeReplayNone(t *testing.T) {
  246. for _, m := range modes {
  247. testHandshakeReplay(t, NUM_BLOCKS, m)
  248. }
  249. }
  250. // Make some blocks. Start a fresh app and apply nBlocks blocks. Then restart the app and sync it up with the remaining blocks
  251. func testHandshakeReplay(t *testing.T, nBlocks int, mode uint) {
  252. config := tendermint_test.ResetConfig("proxy_test_")
  253. // copy the many_blocks file
  254. walBody, err := cmn.ReadFile(path.Join(data_dir, "many_blocks.cswal"))
  255. if err != nil {
  256. t.Fatal(err)
  257. }
  258. walFile := writeWAL(string(walBody))
  259. config.Set("cs_wal_file", walFile)
  260. privVal := types.LoadPrivValidator(config.GetString("priv_validator_file"))
  261. testPartSize = config.GetInt("block_part_size")
  262. wal, err := NewWAL(walFile, false)
  263. if err != nil {
  264. t.Fatal(err)
  265. }
  266. chain, commits, err := makeBlockchainFromWAL(wal)
  267. if err != nil {
  268. t.Fatalf(err.Error())
  269. }
  270. state, store := stateAndStore(config, privVal.PubKey)
  271. store.chain = chain
  272. store.commits = commits
  273. // run the chain through state.ApplyBlock to build up the tendermint state
  274. latestAppHash := buildTMStateFromChain(config, state, chain, mode)
  275. // make a new client creator
  276. dummyApp := dummy.NewPersistentDummyApplication(path.Join(config.GetString("db_dir"), "2"))
  277. clientCreator2 := proxy.NewLocalClientCreator(dummyApp)
  278. if nBlocks > 0 {
  279. // run nBlocks against a new client to build up the app state.
  280. // use a throwaway tendermint state
  281. proxyApp := proxy.NewAppConns(config, clientCreator2, nil)
  282. state, _ := stateAndStore(config, privVal.PubKey)
  283. buildAppStateFromChain(proxyApp, state, chain, nBlocks, mode)
  284. }
  285. // now start the app using the handshake - it should sync
  286. handshaker := NewHandshaker(config, state, store)
  287. proxyApp := proxy.NewAppConns(config, clientCreator2, handshaker)
  288. if _, err := proxyApp.Start(); err != nil {
  289. t.Fatalf("Error starting proxy app connections: %v", err)
  290. }
  291. // get the latest app hash from the app
  292. res, err := proxyApp.Query().InfoSync()
  293. if err != nil {
  294. t.Fatal(err)
  295. }
  296. // the app hash should be synced up
  297. if !bytes.Equal(latestAppHash, res.LastBlockAppHash) {
  298. t.Fatalf("Expected app hashes to match after handshake/replay. got %X, expected %X", res.LastBlockAppHash, latestAppHash)
  299. }
  300. expectedBlocksToSync := NUM_BLOCKS - nBlocks
  301. if nBlocks == NUM_BLOCKS && mode > 0 {
  302. expectedBlocksToSync += 1
  303. } else if nBlocks > 0 && mode == 1 {
  304. expectedBlocksToSync += 1
  305. }
  306. if handshaker.NBlocks() != expectedBlocksToSync {
  307. t.Fatalf("Expected handshake to sync %d blocks, got %d", expectedBlocksToSync, handshaker.NBlocks())
  308. }
  309. }
  310. func applyBlock(st *sm.State, blk *types.Block, proxyApp proxy.AppConns) {
  311. err := st.ApplyBlock(nil, proxyApp.Consensus(), blk, blk.MakePartSet(testPartSize).Header(), mempool)
  312. if err != nil {
  313. panic(err)
  314. }
  315. }
  316. func buildAppStateFromChain(proxyApp proxy.AppConns,
  317. state *sm.State, chain []*types.Block, nBlocks int, mode uint) {
  318. // start a new app without handshake, play nBlocks blocks
  319. if _, err := proxyApp.Start(); err != nil {
  320. panic(err)
  321. }
  322. defer proxyApp.Stop()
  323. switch mode {
  324. case 0:
  325. for i := 0; i < nBlocks; i++ {
  326. block := chain[i]
  327. applyBlock(state, block, proxyApp)
  328. }
  329. case 1, 2:
  330. for i := 0; i < nBlocks-1; i++ {
  331. block := chain[i]
  332. applyBlock(state, block, proxyApp)
  333. }
  334. if mode == 2 {
  335. // update the dummy height and apphash
  336. // as if we ran commit but not
  337. applyBlock(state, chain[nBlocks-1], proxyApp)
  338. }
  339. }
  340. }
  341. func buildTMStateFromChain(config cfg.Config, state *sm.State, chain []*types.Block, mode uint) []byte {
  342. // run the whole chain against this client to build up the tendermint state
  343. clientCreator := proxy.NewLocalClientCreator(dummy.NewPersistentDummyApplication(path.Join(config.GetString("db_dir"), "1")))
  344. proxyApp := proxy.NewAppConns(config, clientCreator, nil) // sm.NewHandshaker(config, state, store, ReplayLastBlock))
  345. if _, err := proxyApp.Start(); err != nil {
  346. panic(err)
  347. }
  348. defer proxyApp.Stop()
  349. var latestAppHash []byte
  350. switch mode {
  351. case 0:
  352. // sync right up
  353. for _, block := range chain {
  354. applyBlock(state, block, proxyApp)
  355. }
  356. latestAppHash = state.AppHash
  357. case 1, 2:
  358. // sync up to the penultimate as if we stored the block.
  359. // whether we commit or not depends on the appHash
  360. for _, block := range chain[:len(chain)-1] {
  361. applyBlock(state, block, proxyApp)
  362. }
  363. // apply the final block to a state copy so we can
  364. // get the right next appHash but keep the state back
  365. stateCopy := state.Copy()
  366. applyBlock(stateCopy, chain[len(chain)-1], proxyApp)
  367. latestAppHash = stateCopy.AppHash
  368. }
  369. return latestAppHash
  370. }
  371. //--------------------------
  372. // utils for making blocks
  373. func makeBlockchainFromWAL(wal *WAL) ([]*types.Block, []*types.Commit, error) {
  374. // Search for height marker
  375. gr, found, err := wal.group.Search("#HEIGHT: ", makeHeightSearchFunc(1))
  376. if err != nil {
  377. return nil, nil, err
  378. }
  379. if !found {
  380. return nil, nil, errors.New(cmn.Fmt("WAL does not contain height %d.", 1))
  381. }
  382. defer gr.Close()
  383. log.Notice("Build a blockchain by reading from the WAL")
  384. var blockParts *types.PartSet
  385. var blocks []*types.Block
  386. var commits []*types.Commit
  387. for {
  388. line, err := gr.ReadLine()
  389. if err != nil {
  390. if err == io.EOF {
  391. break
  392. } else {
  393. return nil, nil, err
  394. }
  395. }
  396. piece, err := readPieceFromWAL([]byte(line))
  397. if err != nil {
  398. return nil, nil, err
  399. }
  400. if piece == nil {
  401. continue
  402. }
  403. switch p := piece.(type) {
  404. case *types.PartSetHeader:
  405. // if its not the first one, we have a full block
  406. if blockParts != nil {
  407. var n int
  408. block := wire.ReadBinary(&types.Block{}, blockParts.GetReader(), types.MaxBlockSize, &n, &err).(*types.Block)
  409. blocks = append(blocks, block)
  410. }
  411. blockParts = types.NewPartSetFromHeader(*p)
  412. case *types.Part:
  413. _, err := blockParts.AddPart(p, false)
  414. if err != nil {
  415. return nil, nil, err
  416. }
  417. case *types.Vote:
  418. if p.Type == types.VoteTypePrecommit {
  419. commit := &types.Commit{
  420. BlockID: p.BlockID,
  421. Precommits: []*types.Vote{p},
  422. }
  423. commits = append(commits, commit)
  424. }
  425. }
  426. }
  427. // grab the last block too
  428. var n int
  429. block := wire.ReadBinary(&types.Block{}, blockParts.GetReader(), types.MaxBlockSize, &n, &err).(*types.Block)
  430. blocks = append(blocks, block)
  431. return blocks, commits, nil
  432. }
  433. func readPieceFromWAL(msgBytes []byte) (interface{}, error) {
  434. // Skip over empty and meta lines
  435. if len(msgBytes) == 0 || msgBytes[0] == '#' {
  436. return nil, nil
  437. }
  438. var err error
  439. var msg TimedWALMessage
  440. wire.ReadJSON(&msg, msgBytes, &err)
  441. if err != nil {
  442. fmt.Println("MsgBytes:", msgBytes, string(msgBytes))
  443. return nil, fmt.Errorf("Error reading json data: %v", err)
  444. }
  445. // for logging
  446. switch m := msg.Msg.(type) {
  447. case msgInfo:
  448. switch msg := m.Msg.(type) {
  449. case *ProposalMessage:
  450. return &msg.Proposal.BlockPartsHeader, nil
  451. case *BlockPartMessage:
  452. return msg.Part, nil
  453. case *VoteMessage:
  454. return msg.Vote, nil
  455. }
  456. }
  457. return nil, nil
  458. }
  459. // make some bogus txs
  460. func txsFunc(blockNum int) (txs []types.Tx) {
  461. for i := 0; i < 10; i++ {
  462. txs = append(txs, types.Tx([]byte{byte(blockNum), byte(i)}))
  463. }
  464. return txs
  465. }
  466. // sign a commit vote
  467. func signCommit(chainID string, privVal *types.PrivValidator, height, round int, hash []byte, header types.PartSetHeader) *types.Vote {
  468. vote := &types.Vote{
  469. ValidatorIndex: 0,
  470. ValidatorAddress: privVal.Address,
  471. Height: height,
  472. Round: round,
  473. Type: types.VoteTypePrecommit,
  474. BlockID: types.BlockID{hash, header},
  475. }
  476. sig := privVal.Sign(types.SignBytes(chainID, vote))
  477. vote.Signature = sig
  478. return vote
  479. }
  480. // make a blockchain with one validator
  481. func makeBlockchain(t *testing.T, chainID string, nBlocks int, privVal *types.PrivValidator, proxyApp proxy.AppConns, state *sm.State) (blockchain []*types.Block, commits []*types.Commit) {
  482. prevHash := state.LastBlockID.Hash
  483. lastCommit := new(types.Commit)
  484. prevParts := types.PartSetHeader{}
  485. valHash := state.Validators.Hash()
  486. prevBlockID := types.BlockID{prevHash, prevParts}
  487. for i := 1; i < nBlocks+1; i++ {
  488. block, parts := types.MakeBlock(i, chainID, txsFunc(i), lastCommit,
  489. prevBlockID, valHash, state.AppHash, testPartSize)
  490. fmt.Println(i)
  491. fmt.Println(block.LastBlockID)
  492. err := state.ApplyBlock(nil, proxyApp.Consensus(), block, block.MakePartSet(testPartSize).Header(), mempool)
  493. if err != nil {
  494. t.Fatal(i, err)
  495. }
  496. voteSet := types.NewVoteSet(chainID, i, 0, types.VoteTypePrecommit, state.Validators)
  497. vote := signCommit(chainID, privVal, i, 0, block.Hash(), parts.Header())
  498. _, err = voteSet.AddVote(vote)
  499. if err != nil {
  500. t.Fatal(err)
  501. }
  502. prevHash = block.Hash()
  503. prevParts = parts.Header()
  504. lastCommit = voteSet.MakeCommit()
  505. prevBlockID = types.BlockID{prevHash, prevParts}
  506. blockchain = append(blockchain, block)
  507. commits = append(commits, lastCommit)
  508. }
  509. return blockchain, commits
  510. }
  511. // fresh state and mock store
  512. func stateAndStore(config cfg.Config, pubKey crypto.PubKey) (*sm.State, *mockBlockStore) {
  513. stateDB := dbm.NewMemDB()
  514. return sm.MakeGenesisState(stateDB, &types.GenesisDoc{
  515. ChainID: config.GetString("chain_id"),
  516. Validators: []types.GenesisValidator{
  517. types.GenesisValidator{pubKey, 10000, "test"},
  518. },
  519. AppHash: nil,
  520. }), NewMockBlockStore(config)
  521. }
  522. //----------------------------------
  523. // mock block store
  524. type mockBlockStore struct {
  525. config cfg.Config
  526. chain []*types.Block
  527. commits []*types.Commit
  528. }
  529. // TODO: NewBlockStore(db.NewMemDB) ...
  530. func NewMockBlockStore(config cfg.Config) *mockBlockStore {
  531. return &mockBlockStore{config, nil, nil}
  532. }
  533. func (bs *mockBlockStore) Height() int { return len(bs.chain) }
  534. func (bs *mockBlockStore) LoadBlock(height int) *types.Block { return bs.chain[height-1] }
  535. func (bs *mockBlockStore) LoadBlockMeta(height int) *types.BlockMeta {
  536. block := bs.chain[height-1]
  537. return &types.BlockMeta{
  538. BlockID: types.BlockID{block.Hash(), block.MakePartSet(bs.config.GetInt("block_part_size")).Header()},
  539. Header: block.Header,
  540. }
  541. }
  542. func (bs *mockBlockStore) LoadBlockPart(height int, index int) *types.Part { return nil }
  543. func (bs *mockBlockStore) SaveBlock(block *types.Block, blockParts *types.PartSet, seenCommit *types.Commit) {
  544. }
  545. func (bs *mockBlockStore) LoadBlockCommit(height int) *types.Commit {
  546. return bs.commits[height-1]
  547. }
  548. func (bs *mockBlockStore) LoadSeenCommit(height int) *types.Commit {
  549. return bs.commits[height-1]
  550. }