You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

450 lines
13 KiB

blockchain: Reorg reactor (#3561) * go routines in blockchain reactor * Added reference to the go routine diagram * Initial commit * cleanup * Undo testing_logger change, committed by mistake * Fix the test loggers * pulled some fsm code into pool.go * added pool tests * changes to the design added block requests under peer moved the request trigger in the reactor poolRoutine, triggered now by a ticker in general moved everything required for making block requests smarter in the poolRoutine added a simple map of heights to keep track of what will need to be requested next added a few more tests * send errors to FSM in a different channel than blocks send errors (RemovePeer) from switch on a different channel than the one receiving blocks renamed channels added more pool tests * more pool tests * lint errors * more tests * more tests * switch fast sync to new implementation * fixed data race in tests * cleanup * finished fsm tests * address golangci comments :) * address golangci comments :) * Added timeout on next block needed to advance * updating docs and cleanup * fix issue in test from previous cleanup * cleanup * Added termination scenarios, tests and more cleanup * small fixes to adr, comments and cleanup * Fix bug in sendRequest() If we tried to send a request to a peer not present in the switch, a missing continue statement caused the request to be blackholed in a peer that was removed and never retried. While this bug was manifesting, the reactor kept asking for other blocks that would be stored and never consumed. Added the number of unconsumed blocks in the math for requesting blocks ahead of current processing height so eventually there will be no more blocks requested until the already received ones are consumed. * remove bpPeer's didTimeout field * Use distinct err codes for peer timeout and FSM timeouts * Don't allow peers to update with lower height * review comments from Ethan and Zarko * some cleanup, renaming, comments * Move block execution in separate goroutine * Remove pool's numPending * review comments * fix lint, remove old blockchain reactor and duplicates in fsm tests * small reorg around peer after review comments * add the reactor spec * verify block only once * review comments * change to int for max number of pending requests * cleanup and godoc * Add configuration flag fast sync version * golangci fixes * fix config template * move both reactor versions under blockchain * cleanup, golint, renaming stuff * updated documentation, fixed more golint warnings * integrate with behavior package * sync with master * gofmt * add changelog_pending entry * move to improvments * suggestion to changelog entry
5 years ago
blockchain: Reorg reactor (#3561) * go routines in blockchain reactor * Added reference to the go routine diagram * Initial commit * cleanup * Undo testing_logger change, committed by mistake * Fix the test loggers * pulled some fsm code into pool.go * added pool tests * changes to the design added block requests under peer moved the request trigger in the reactor poolRoutine, triggered now by a ticker in general moved everything required for making block requests smarter in the poolRoutine added a simple map of heights to keep track of what will need to be requested next added a few more tests * send errors to FSM in a different channel than blocks send errors (RemovePeer) from switch on a different channel than the one receiving blocks renamed channels added more pool tests * more pool tests * lint errors * more tests * more tests * switch fast sync to new implementation * fixed data race in tests * cleanup * finished fsm tests * address golangci comments :) * address golangci comments :) * Added timeout on next block needed to advance * updating docs and cleanup * fix issue in test from previous cleanup * cleanup * Added termination scenarios, tests and more cleanup * small fixes to adr, comments and cleanup * Fix bug in sendRequest() If we tried to send a request to a peer not present in the switch, a missing continue statement caused the request to be blackholed in a peer that was removed and never retried. While this bug was manifesting, the reactor kept asking for other blocks that would be stored and never consumed. Added the number of unconsumed blocks in the math for requesting blocks ahead of current processing height so eventually there will be no more blocks requested until the already received ones are consumed. * remove bpPeer's didTimeout field * Use distinct err codes for peer timeout and FSM timeouts * Don't allow peers to update with lower height * review comments from Ethan and Zarko * some cleanup, renaming, comments * Move block execution in separate goroutine * Remove pool's numPending * review comments * fix lint, remove old blockchain reactor and duplicates in fsm tests * small reorg around peer after review comments * add the reactor spec * verify block only once * review comments * change to int for max number of pending requests * cleanup and godoc * Add configuration flag fast sync version * golangci fixes * fix config template * move both reactor versions under blockchain * cleanup, golint, renaming stuff * updated documentation, fixed more golint warnings * integrate with behavior package * sync with master * gofmt * add changelog_pending entry * move to improvments * suggestion to changelog entry
5 years ago
  1. package store
  2. import (
  3. "bytes"
  4. "fmt"
  5. "os"
  6. "runtime/debug"
  7. "strings"
  8. "testing"
  9. "time"
  10. "github.com/pkg/errors"
  11. "github.com/stretchr/testify/assert"
  12. "github.com/stretchr/testify/require"
  13. db "github.com/tendermint/tm-db"
  14. dbm "github.com/tendermint/tm-db"
  15. cfg "github.com/tendermint/tendermint/config"
  16. "github.com/tendermint/tendermint/libs/log"
  17. sm "github.com/tendermint/tendermint/state"
  18. "github.com/tendermint/tendermint/types"
  19. tmtime "github.com/tendermint/tendermint/types/time"
  20. )
  21. // A cleanupFunc cleans up any config / test files created for a particular
  22. // test.
  23. type cleanupFunc func()
  24. // make a Commit with a single vote containing just the height and a timestamp
  25. func makeTestCommit(height int64, timestamp time.Time) *types.Commit {
  26. commitSigs := []types.CommitSig{{
  27. BlockIDFlag: types.BlockIDFlagCommit,
  28. ValidatorAddress: []byte("ValidatorAddress"),
  29. Timestamp: timestamp,
  30. Signature: []byte("Signature"),
  31. }}
  32. return types.NewCommit(height, 0, types.BlockID{}, commitSigs)
  33. }
  34. func makeTxs(height int64) (txs []types.Tx) {
  35. for i := 0; i < 10; i++ {
  36. txs = append(txs, types.Tx([]byte{byte(height), byte(i)}))
  37. }
  38. return txs
  39. }
  40. func makeBlock(height int64, state sm.State, lastCommit *types.Commit) *types.Block {
  41. block, _ := state.MakeBlock(height, makeTxs(height), lastCommit, nil, state.Validators.GetProposer().Address)
  42. return block
  43. }
  44. func makeStateAndBlockStore(logger log.Logger) (sm.State, *BlockStore, cleanupFunc) {
  45. config := cfg.ResetTestRoot("blockchain_reactor_test")
  46. // blockDB := dbm.NewDebugDB("blockDB", dbm.NewMemDB())
  47. // stateDB := dbm.NewDebugDB("stateDB", dbm.NewMemDB())
  48. blockDB := dbm.NewMemDB()
  49. stateDB := dbm.NewMemDB()
  50. state, err := sm.LoadStateFromDBOrGenesisFile(stateDB, config.GenesisFile())
  51. if err != nil {
  52. panic(errors.Wrap(err, "error constructing state from genesis file"))
  53. }
  54. return state, NewBlockStore(blockDB), func() { os.RemoveAll(config.RootDir) }
  55. }
  56. func TestLoadBlockStoreStateJSON(t *testing.T) {
  57. db := db.NewMemDB()
  58. bsj := &BlockStoreStateJSON{Height: 1000}
  59. bsj.Save(db)
  60. retrBSJ := LoadBlockStoreStateJSON(db)
  61. assert.Equal(t, *bsj, retrBSJ, "expected the retrieved DBs to match")
  62. }
  63. func TestNewBlockStore(t *testing.T) {
  64. db := db.NewMemDB()
  65. err := db.Set(blockStoreKey, []byte(`{"height": "10000"}`))
  66. require.NoError(t, err)
  67. bs := NewBlockStore(db)
  68. require.Equal(t, int64(10000), bs.Height(), "failed to properly parse blockstore")
  69. panicCausers := []struct {
  70. data []byte
  71. wantErr string
  72. }{
  73. {[]byte("artful-doger"), "not unmarshal bytes"},
  74. {[]byte(" "), "unmarshal bytes"},
  75. }
  76. for i, tt := range panicCausers {
  77. tt := tt
  78. // Expecting a panic here on trying to parse an invalid blockStore
  79. _, _, panicErr := doFn(func() (interface{}, error) {
  80. err := db.Set(blockStoreKey, tt.data)
  81. require.NoError(t, err)
  82. _ = NewBlockStore(db)
  83. return nil, nil
  84. })
  85. require.NotNil(t, panicErr, "#%d panicCauser: %q expected a panic", i, tt.data)
  86. assert.Contains(t, fmt.Sprintf("%#v", panicErr), tt.wantErr, "#%d data: %q", i, tt.data)
  87. }
  88. err = db.Set(blockStoreKey, nil)
  89. require.NoError(t, err)
  90. bs = NewBlockStore(db)
  91. assert.Equal(t, bs.Height(), int64(0), "expecting nil bytes to be unmarshaled alright")
  92. }
  93. func freshBlockStore() (*BlockStore, db.DB) {
  94. db := db.NewMemDB()
  95. return NewBlockStore(db), db
  96. }
  97. var (
  98. state sm.State
  99. block *types.Block
  100. partSet *types.PartSet
  101. part1 *types.Part
  102. part2 *types.Part
  103. seenCommit1 *types.Commit
  104. )
  105. func TestMain(m *testing.M) {
  106. var cleanup cleanupFunc
  107. state, _, cleanup = makeStateAndBlockStore(log.NewTMLogger(new(bytes.Buffer)))
  108. block = makeBlock(1, state, new(types.Commit))
  109. partSet = block.MakePartSet(2)
  110. part1 = partSet.GetPart(0)
  111. part2 = partSet.GetPart(1)
  112. seenCommit1 = makeTestCommit(10, tmtime.Now())
  113. code := m.Run()
  114. cleanup()
  115. os.Exit(code)
  116. }
  117. // TODO: This test should be simplified ...
  118. func TestBlockStoreSaveLoadBlock(t *testing.T) {
  119. state, bs, cleanup := makeStateAndBlockStore(log.NewTMLogger(new(bytes.Buffer)))
  120. defer cleanup()
  121. require.Equal(t, bs.Height(), int64(0), "initially the height should be zero")
  122. // check there are no blocks at various heights
  123. noBlockHeights := []int64{0, -1, 100, 1000, 2}
  124. for i, height := range noBlockHeights {
  125. if g := bs.LoadBlock(height); g != nil {
  126. t.Errorf("#%d: height(%d) got a block; want nil", i, height)
  127. }
  128. }
  129. // save a block
  130. block := makeBlock(bs.Height()+1, state, new(types.Commit))
  131. validPartSet := block.MakePartSet(2)
  132. seenCommit := makeTestCommit(10, tmtime.Now())
  133. bs.SaveBlock(block, partSet, seenCommit)
  134. require.Equal(t, bs.Height(), block.Header.Height, "expecting the new height to be changed")
  135. incompletePartSet := types.NewPartSetFromHeader(types.PartSetHeader{Total: 2})
  136. uncontiguousPartSet := types.NewPartSetFromHeader(types.PartSetHeader{Total: 0})
  137. uncontiguousPartSet.AddPart(part2)
  138. header1 := types.Header{
  139. Height: 1,
  140. ChainID: "block_test",
  141. Time: tmtime.Now(),
  142. }
  143. header2 := header1
  144. header2.Height = 4
  145. // End of setup, test data
  146. commitAtH10 := makeTestCommit(10, tmtime.Now())
  147. tuples := []struct {
  148. block *types.Block
  149. parts *types.PartSet
  150. seenCommit *types.Commit
  151. wantPanic string
  152. wantErr bool
  153. corruptBlockInDB bool
  154. corruptCommitInDB bool
  155. corruptSeenCommitInDB bool
  156. eraseCommitInDB bool
  157. eraseSeenCommitInDB bool
  158. }{
  159. {
  160. block: newBlock(header1, commitAtH10),
  161. parts: validPartSet,
  162. seenCommit: seenCommit1,
  163. },
  164. {
  165. block: nil,
  166. wantPanic: "only save a non-nil block",
  167. },
  168. {
  169. block: newBlock(header2, commitAtH10),
  170. parts: uncontiguousPartSet,
  171. wantPanic: "only save contiguous blocks", // and incomplete and uncontiguous parts
  172. },
  173. {
  174. block: newBlock(header1, commitAtH10),
  175. parts: incompletePartSet,
  176. wantPanic: "only save complete block", // incomplete parts
  177. },
  178. {
  179. block: newBlock(header1, commitAtH10),
  180. parts: validPartSet,
  181. seenCommit: seenCommit1,
  182. corruptCommitInDB: true, // Corrupt the DB's commit entry
  183. wantPanic: "unmarshal to types.Commit failed",
  184. },
  185. {
  186. block: newBlock(header1, commitAtH10),
  187. parts: validPartSet,
  188. seenCommit: seenCommit1,
  189. wantPanic: "unmarshal to types.BlockMeta failed",
  190. corruptBlockInDB: true, // Corrupt the DB's block entry
  191. },
  192. {
  193. block: newBlock(header1, commitAtH10),
  194. parts: validPartSet,
  195. seenCommit: seenCommit1,
  196. // Expecting no error and we want a nil back
  197. eraseSeenCommitInDB: true,
  198. },
  199. {
  200. block: newBlock(header1, commitAtH10),
  201. parts: validPartSet,
  202. seenCommit: seenCommit1,
  203. corruptSeenCommitInDB: true,
  204. wantPanic: "unmarshal to types.Commit failed",
  205. },
  206. {
  207. block: newBlock(header1, commitAtH10),
  208. parts: validPartSet,
  209. seenCommit: seenCommit1,
  210. // Expecting no error and we want a nil back
  211. eraseCommitInDB: true,
  212. },
  213. }
  214. type quad struct {
  215. block *types.Block
  216. commit *types.Commit
  217. meta *types.BlockMeta
  218. seenCommit *types.Commit
  219. }
  220. for i, tuple := range tuples {
  221. tuple := tuple
  222. bs, db := freshBlockStore()
  223. // SaveBlock
  224. res, err, panicErr := doFn(func() (interface{}, error) {
  225. bs.SaveBlock(tuple.block, tuple.parts, tuple.seenCommit)
  226. if tuple.block == nil {
  227. return nil, nil
  228. }
  229. if tuple.corruptBlockInDB {
  230. err := db.Set(calcBlockMetaKey(tuple.block.Height), []byte("block-bogus"))
  231. require.NoError(t, err)
  232. }
  233. bBlock := bs.LoadBlock(tuple.block.Height)
  234. bBlockMeta := bs.LoadBlockMeta(tuple.block.Height)
  235. if tuple.eraseSeenCommitInDB {
  236. db.Delete(calcSeenCommitKey(tuple.block.Height))
  237. }
  238. if tuple.corruptSeenCommitInDB {
  239. err := db.Set(calcSeenCommitKey(tuple.block.Height), []byte("bogus-seen-commit"))
  240. require.NoError(t, err)
  241. }
  242. bSeenCommit := bs.LoadSeenCommit(tuple.block.Height)
  243. commitHeight := tuple.block.Height - 1
  244. if tuple.eraseCommitInDB {
  245. db.Delete(calcBlockCommitKey(commitHeight))
  246. }
  247. if tuple.corruptCommitInDB {
  248. err := db.Set(calcBlockCommitKey(commitHeight), []byte("foo-bogus"))
  249. require.NoError(t, err)
  250. }
  251. bCommit := bs.LoadBlockCommit(commitHeight)
  252. return &quad{block: bBlock, seenCommit: bSeenCommit, commit: bCommit,
  253. meta: bBlockMeta}, nil
  254. })
  255. if subStr := tuple.wantPanic; subStr != "" {
  256. if panicErr == nil {
  257. t.Errorf("#%d: want a non-nil panic", i)
  258. } else if got := fmt.Sprintf("%#v", panicErr); !strings.Contains(got, subStr) {
  259. t.Errorf("#%d:\n\tgotErr: %q\nwant substring: %q", i, got, subStr)
  260. }
  261. continue
  262. }
  263. if tuple.wantErr {
  264. if err == nil {
  265. t.Errorf("#%d: got nil error", i)
  266. }
  267. continue
  268. }
  269. assert.Nil(t, panicErr, "#%d: unexpected panic", i)
  270. assert.Nil(t, err, "#%d: expecting a non-nil error", i)
  271. qua, ok := res.(*quad)
  272. if !ok || qua == nil {
  273. t.Errorf("#%d: got nil quad back; gotType=%T", i, res)
  274. continue
  275. }
  276. if tuple.eraseSeenCommitInDB {
  277. assert.Nil(t, qua.seenCommit,
  278. "erased the seenCommit in the DB hence we should get back a nil seenCommit")
  279. }
  280. if tuple.eraseCommitInDB {
  281. assert.Nil(t, qua.commit,
  282. "erased the commit in the DB hence we should get back a nil commit")
  283. }
  284. }
  285. }
  286. func TestLoadBlockPart(t *testing.T) {
  287. bs, db := freshBlockStore()
  288. height, index := int64(10), 1
  289. loadPart := func() (interface{}, error) {
  290. part := bs.LoadBlockPart(height, index)
  291. return part, nil
  292. }
  293. // Initially no contents.
  294. // 1. Requesting for a non-existent block shouldn't fail
  295. res, _, panicErr := doFn(loadPart)
  296. require.Nil(t, panicErr, "a non-existent block part shouldn't cause a panic")
  297. require.Nil(t, res, "a non-existent block part should return nil")
  298. // 2. Next save a corrupted block then try to load it
  299. err := db.Set(calcBlockPartKey(height, index), []byte("Tendermint"))
  300. require.NoError(t, err)
  301. res, _, panicErr = doFn(loadPart)
  302. require.NotNil(t, panicErr, "expecting a non-nil panic")
  303. require.Contains(t, panicErr.Error(), "unmarshal to types.Part failed")
  304. // 3. A good block serialized and saved to the DB should be retrievable
  305. err = db.Set(calcBlockPartKey(height, index), cdc.MustMarshalBinaryBare(part1))
  306. require.NoError(t, err)
  307. gotPart, _, panicErr := doFn(loadPart)
  308. require.Nil(t, panicErr, "an existent and proper block should not panic")
  309. require.Nil(t, res, "a properly saved block should return a proper block")
  310. require.Equal(t, gotPart.(*types.Part), part1,
  311. "expecting successful retrieval of previously saved block")
  312. }
  313. func TestLoadBlockMeta(t *testing.T) {
  314. bs, db := freshBlockStore()
  315. height := int64(10)
  316. loadMeta := func() (interface{}, error) {
  317. meta := bs.LoadBlockMeta(height)
  318. return meta, nil
  319. }
  320. // Initially no contents.
  321. // 1. Requesting for a non-existent blockMeta shouldn't fail
  322. res, _, panicErr := doFn(loadMeta)
  323. require.Nil(t, panicErr, "a non-existent blockMeta shouldn't cause a panic")
  324. require.Nil(t, res, "a non-existent blockMeta should return nil")
  325. // 2. Next save a corrupted blockMeta then try to load it
  326. err := db.Set(calcBlockMetaKey(height), []byte("Tendermint-Meta"))
  327. require.NoError(t, err)
  328. res, _, panicErr = doFn(loadMeta)
  329. require.NotNil(t, panicErr, "expecting a non-nil panic")
  330. require.Contains(t, panicErr.Error(), "unmarshal to types.BlockMeta")
  331. // 3. A good blockMeta serialized and saved to the DB should be retrievable
  332. meta := &types.BlockMeta{}
  333. err = db.Set(calcBlockMetaKey(height), cdc.MustMarshalBinaryBare(meta))
  334. require.NoError(t, err)
  335. gotMeta, _, panicErr := doFn(loadMeta)
  336. require.Nil(t, panicErr, "an existent and proper block should not panic")
  337. require.Nil(t, res, "a properly saved blockMeta should return a proper blocMeta ")
  338. require.Equal(t, cdc.MustMarshalBinaryBare(meta), cdc.MustMarshalBinaryBare(gotMeta),
  339. "expecting successful retrieval of previously saved blockMeta")
  340. }
  341. func TestBlockFetchAtHeight(t *testing.T) {
  342. state, bs, cleanup := makeStateAndBlockStore(log.NewTMLogger(new(bytes.Buffer)))
  343. defer cleanup()
  344. require.Equal(t, bs.Height(), int64(0), "initially the height should be zero")
  345. block := makeBlock(bs.Height()+1, state, new(types.Commit))
  346. partSet := block.MakePartSet(2)
  347. seenCommit := makeTestCommit(10, tmtime.Now())
  348. bs.SaveBlock(block, partSet, seenCommit)
  349. require.Equal(t, bs.Height(), block.Header.Height, "expecting the new height to be changed")
  350. blockAtHeight := bs.LoadBlock(bs.Height())
  351. bz1 := cdc.MustMarshalBinaryBare(block)
  352. bz2 := cdc.MustMarshalBinaryBare(blockAtHeight)
  353. require.Equal(t, bz1, bz2)
  354. require.Equal(t, block.Hash(), blockAtHeight.Hash(),
  355. "expecting a successful load of the last saved block")
  356. blockAtHeightPlus1 := bs.LoadBlock(bs.Height() + 1)
  357. require.Nil(t, blockAtHeightPlus1, "expecting an unsuccessful load of Height()+1")
  358. blockAtHeightPlus2 := bs.LoadBlock(bs.Height() + 2)
  359. require.Nil(t, blockAtHeightPlus2, "expecting an unsuccessful load of Height()+2")
  360. }
  361. func doFn(fn func() (interface{}, error)) (res interface{}, err error, panicErr error) {
  362. defer func() {
  363. if r := recover(); r != nil {
  364. switch e := r.(type) {
  365. case error:
  366. panicErr = e
  367. case string:
  368. panicErr = fmt.Errorf("%s", e)
  369. default:
  370. if st, ok := r.(fmt.Stringer); ok {
  371. panicErr = fmt.Errorf("%s", st)
  372. } else {
  373. panicErr = fmt.Errorf("%s", debug.Stack())
  374. }
  375. }
  376. }
  377. }()
  378. res, err = fn()
  379. return res, err, panicErr
  380. }
  381. func newBlock(hdr types.Header, lastCommit *types.Commit) *types.Block {
  382. return &types.Block{
  383. Header: hdr,
  384. LastCommit: lastCommit,
  385. }
  386. }