You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1976 lines
62 KiB

  1. package consensus
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "io/ioutil"
  7. "os"
  8. "reflect"
  9. "runtime/debug"
  10. "sync"
  11. "time"
  12. "github.com/gogo/protobuf/proto"
  13. cfg "github.com/tendermint/tendermint/config"
  14. cstypes "github.com/tendermint/tendermint/consensus/types"
  15. "github.com/tendermint/tendermint/crypto"
  16. tmevents "github.com/tendermint/tendermint/libs/events"
  17. "github.com/tendermint/tendermint/libs/fail"
  18. tmjson "github.com/tendermint/tendermint/libs/json"
  19. "github.com/tendermint/tendermint/libs/log"
  20. tmmath "github.com/tendermint/tendermint/libs/math"
  21. tmos "github.com/tendermint/tendermint/libs/os"
  22. "github.com/tendermint/tendermint/libs/service"
  23. "github.com/tendermint/tendermint/p2p"
  24. tmproto "github.com/tendermint/tendermint/proto/tendermint/types"
  25. sm "github.com/tendermint/tendermint/state"
  26. "github.com/tendermint/tendermint/types"
  27. tmtime "github.com/tendermint/tendermint/types/time"
  28. )
  29. // State handles execution of the consensus algorithm.
  30. // It processes votes and proposals, and upon reaching agreement,
  31. // commits blocks to the chain and executes them against the application.
  32. // The internal state machine receives input from peers, the internal validator, and from a timer.
  33. type State struct {
  34. service.BaseService
  35. // config details
  36. config *cfg.ConsensusConfig
  37. privValidator types.PrivValidator // for signing votes
  38. // store blocks and commits
  39. blockStore sm.BlockStore
  40. // create and execute blocks
  41. blockExec *sm.BlockExecutor
  42. // notify us if txs are available
  43. txNotifier txNotifier
  44. // add evidence to the pool
  45. // when it's detected
  46. evpool evidencePool
  47. // internal state
  48. mtx sync.RWMutex
  49. cstypes.RoundState
  50. state sm.State // State until height-1.
  51. // state changes may be triggered by: msgs from peers,
  52. // msgs from ourself, or by timeouts
  53. peerMsgQueue chan msgInfo
  54. internalMsgQueue chan msgInfo
  55. timeoutTicker TimeoutTicker
  56. // privValidator pubkey, memoized for the duration of one block
  57. // to avoid extra requests to HSM
  58. privValidatorPubKey crypto.PubKey
  59. // information about about added votes and block parts are written on this channel
  60. // so statistics can be computed by reactor
  61. statsMsgQueue chan msgInfo
  62. // we use eventBus to trigger msg broadcasts in the reactor,
  63. // and to notify external subscribers, eg. through a websocket
  64. eventBus *types.EventBus
  65. // a Write-Ahead Log ensures we can recover from any kind of crash
  66. // and helps us avoid signing conflicting votes
  67. wal WAL
  68. replayMode bool // so we don't log signing errors during replay
  69. doWALCatchup bool // determines if we even try to do the catchup
  70. // for tests where we want to limit the number of transitions the state makes
  71. nSteps int
  72. // some functions can be overwritten for testing
  73. decideProposal func(height int64, round int32)
  74. // closed when we finish shutting down
  75. done chan struct{}
  76. // synchronous pubsub between consensus state and reactor.
  77. // state only emits EventNewRoundStep and EventVote
  78. evsw tmevents.EventSwitch
  79. // for reporting metrics
  80. metrics *Metrics
  81. // misbehaviors mapped for each height (can't have more than one misbehavior per height)
  82. misbehaviors map[int64]Misbehavior
  83. // the switch is passed to the state so that maveick misbehaviors can directly control which
  84. // information they send to which nodes
  85. sw *p2p.Switch
  86. }
  87. // StateOption sets an optional parameter on the State.
  88. type StateOption func(*State)
  89. // NewState returns a new State.
  90. func NewState(
  91. config *cfg.ConsensusConfig,
  92. state sm.State,
  93. blockExec *sm.BlockExecutor,
  94. blockStore sm.BlockStore,
  95. txNotifier txNotifier,
  96. evpool evidencePool,
  97. misbehaviors map[int64]Misbehavior,
  98. options ...StateOption,
  99. ) *State {
  100. cs := &State{
  101. config: config,
  102. blockExec: blockExec,
  103. blockStore: blockStore,
  104. txNotifier: txNotifier,
  105. peerMsgQueue: make(chan msgInfo, msgQueueSize),
  106. internalMsgQueue: make(chan msgInfo, msgQueueSize),
  107. timeoutTicker: NewTimeoutTicker(),
  108. statsMsgQueue: make(chan msgInfo, msgQueueSize),
  109. done: make(chan struct{}),
  110. doWALCatchup: true,
  111. wal: nilWAL{},
  112. evpool: evpool,
  113. evsw: tmevents.NewEventSwitch(),
  114. metrics: NopMetrics(),
  115. misbehaviors: misbehaviors,
  116. }
  117. // set function defaults (may be overwritten before calling Start)
  118. cs.decideProposal = cs.defaultDecideProposal
  119. // We have no votes, so reconstruct LastCommit from SeenCommit.
  120. if state.LastBlockHeight > 0 {
  121. cs.reconstructLastCommit(state)
  122. }
  123. cs.updateToState(state)
  124. // Don't call scheduleRound0 yet.
  125. // We do that upon Start().
  126. cs.BaseService = *service.NewBaseService(nil, "State", cs)
  127. for _, option := range options {
  128. option(cs)
  129. }
  130. return cs
  131. }
  132. // I know this is not great but the maverick consensus state needs access to the peers
  133. func (cs *State) SetSwitch(sw *p2p.Switch) {
  134. cs.sw = sw
  135. }
  136. // state transitions on complete-proposal, 2/3-any, 2/3-one
  137. func (cs *State) handleMsg(mi msgInfo) {
  138. cs.mtx.Lock()
  139. defer cs.mtx.Unlock()
  140. var (
  141. added bool
  142. err error
  143. )
  144. msg, peerID := mi.Msg, mi.PeerID
  145. switch msg := msg.(type) {
  146. case *ProposalMessage:
  147. // will not cause transition.
  148. // once proposal is set, we can receive block parts
  149. // err = cs.setProposal(msg.Proposal)
  150. if b, ok := cs.misbehaviors[cs.Height]; ok {
  151. err = b.ReceiveProposal(cs, msg.Proposal)
  152. } else {
  153. err = defaultReceiveProposal(cs, msg.Proposal)
  154. }
  155. case *BlockPartMessage:
  156. // if the proposal is complete, we'll enterPrevote or tryFinalizeCommit
  157. added, err = cs.addProposalBlockPart(msg, peerID)
  158. if added {
  159. cs.statsMsgQueue <- mi
  160. }
  161. if err != nil && msg.Round != cs.Round {
  162. cs.Logger.Debug(
  163. "Received block part from wrong round",
  164. "height",
  165. cs.Height,
  166. "csRound",
  167. cs.Round,
  168. "blockRound",
  169. msg.Round)
  170. err = nil
  171. }
  172. case *VoteMessage:
  173. // attempt to add the vote and dupeout the validator if its a duplicate signature
  174. // if the vote gives us a 2/3-any or 2/3-one, we transition
  175. added, err = cs.tryAddVote(msg.Vote, peerID)
  176. if added {
  177. cs.statsMsgQueue <- mi
  178. }
  179. // if err == ErrAddingVote {
  180. // TODO: punish peer
  181. // We probably don't want to stop the peer here. The vote does not
  182. // necessarily comes from a malicious peer but can be just broadcasted by
  183. // a typical peer.
  184. // https://github.com/tendermint/tendermint/issues/1281
  185. // }
  186. // NOTE: the vote is broadcast to peers by the reactor listening
  187. // for vote events
  188. // TODO: If rs.Height == vote.Height && rs.Round < vote.Round,
  189. // the peer is sending us CatchupCommit precommits.
  190. // We could make note of this and help filter in broadcastHasVoteMessage().
  191. default:
  192. cs.Logger.Error("Unknown msg type", "type", reflect.TypeOf(msg))
  193. return
  194. }
  195. if err != nil {
  196. cs.Logger.Error("Error with msg", "height", cs.Height, "round", cs.Round,
  197. "peer", peerID, "err", err, "msg", msg)
  198. }
  199. }
  200. // Enter (CreateEmptyBlocks): from enterNewRound(height,round)
  201. // Enter (CreateEmptyBlocks, CreateEmptyBlocksInterval > 0 ):
  202. // after enterNewRound(height,round), after timeout of CreateEmptyBlocksInterval
  203. // Enter (!CreateEmptyBlocks) : after enterNewRound(height,round), once txs are in the mempool
  204. func (cs *State) enterPropose(height int64, round int32) {
  205. logger := cs.Logger.With("height", height, "round", round)
  206. if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPropose <= cs.Step) {
  207. logger.Debug(fmt.Sprintf(
  208. "enterPropose(%v/%v): Invalid args. Current step: %v/%v/%v",
  209. height,
  210. round,
  211. cs.Height,
  212. cs.Round,
  213. cs.Step))
  214. return
  215. }
  216. logger.Info(fmt.Sprintf("enterPropose(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step))
  217. defer func() {
  218. // Done enterPropose:
  219. cs.updateRoundStep(round, cstypes.RoundStepPropose)
  220. cs.newStep()
  221. // If we have the whole proposal + POL, then goto Prevote now.
  222. // else, we'll enterPrevote when the rest of the proposal is received (in AddProposalBlockPart),
  223. // or else after timeoutPropose
  224. if cs.isProposalComplete() {
  225. cs.enterPrevote(height, cs.Round)
  226. }
  227. }()
  228. if b, ok := cs.misbehaviors[cs.Height]; ok {
  229. b.EnterPropose(cs, height, round)
  230. } else {
  231. defaultEnterPropose(cs, height, round)
  232. }
  233. }
  234. // Enter: `timeoutPropose` after entering Propose.
  235. // Enter: proposal block and POL is ready.
  236. // Prevote for LockedBlock if we're locked, or ProposalBlock if valid.
  237. // Otherwise vote nil.
  238. func (cs *State) enterPrevote(height int64, round int32) {
  239. if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevote <= cs.Step) {
  240. cs.Logger.Debug(fmt.Sprintf(
  241. "enterPrevote(%v/%v): Invalid args. Current step: %v/%v/%v",
  242. height,
  243. round,
  244. cs.Height,
  245. cs.Round,
  246. cs.Step))
  247. return
  248. }
  249. defer func() {
  250. // Done enterPrevote:
  251. cs.updateRoundStep(round, cstypes.RoundStepPrevote)
  252. cs.newStep()
  253. }()
  254. cs.Logger.Info(fmt.Sprintf("enterPrevote(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step))
  255. // Sign and broadcast vote as necessary
  256. if b, ok := cs.misbehaviors[cs.Height]; ok {
  257. b.EnterPrevote(cs, height, round)
  258. } else {
  259. defaultEnterPrevote(cs, height, round)
  260. }
  261. // Once `addVote` hits any +2/3 prevotes, we will go to PrevoteWait
  262. // (so we have more time to try and collect +2/3 prevotes for a single block)
  263. }
  264. // Enter: `timeoutPrevote` after any +2/3 prevotes.
  265. // Enter: `timeoutPrecommit` after any +2/3 precommits.
  266. // Enter: +2/3 precomits for block or nil.
  267. // Lock & precommit the ProposalBlock if we have enough prevotes for it (a POL in this round)
  268. // else, unlock an existing lock and precommit nil if +2/3 of prevotes were nil,
  269. // else, precommit nil otherwise.
  270. func (cs *State) enterPrecommit(height int64, round int32) {
  271. logger := cs.Logger.With("height", height, "round", round)
  272. if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrecommit <= cs.Step) {
  273. logger.Debug(fmt.Sprintf(
  274. "enterPrecommit(%v/%v): Invalid args. Current step: %v/%v/%v",
  275. height,
  276. round,
  277. cs.Height,
  278. cs.Round,
  279. cs.Step))
  280. return
  281. }
  282. logger.Info(fmt.Sprintf("enterPrecommit(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step))
  283. defer func() {
  284. // Done enterPrecommit:
  285. cs.updateRoundStep(round, cstypes.RoundStepPrecommit)
  286. cs.newStep()
  287. }()
  288. if b, ok := cs.misbehaviors[cs.Height]; ok {
  289. b.EnterPrecommit(cs, height, round)
  290. } else {
  291. defaultEnterPrecommit(cs, height, round)
  292. }
  293. }
  294. func (cs *State) addVote(
  295. vote *types.Vote,
  296. peerID p2p.NodeID) (added bool, err error) {
  297. cs.Logger.Debug(
  298. "addVote",
  299. "voteHeight",
  300. vote.Height,
  301. "voteType",
  302. vote.Type,
  303. "valIndex",
  304. vote.ValidatorIndex,
  305. "csHeight",
  306. cs.Height,
  307. )
  308. // A precommit for the previous height?
  309. // These come in while we wait timeoutCommit
  310. if vote.Height+1 == cs.Height && vote.Type == tmproto.PrecommitType {
  311. if cs.Step != cstypes.RoundStepNewHeight {
  312. // Late precommit at prior height is ignored
  313. cs.Logger.Debug("Precommit vote came in after commit timeout and has been ignored", "vote", vote)
  314. return
  315. }
  316. added, err = cs.LastCommit.AddVote(vote)
  317. if !added {
  318. return
  319. }
  320. cs.Logger.Info(fmt.Sprintf("Added to lastPrecommits: %v", cs.LastCommit.StringShort()))
  321. _ = cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote})
  322. cs.evsw.FireEvent(types.EventVote, vote)
  323. // if we can skip timeoutCommit and have all the votes now,
  324. if cs.config.SkipTimeoutCommit && cs.LastCommit.HasAll() {
  325. // go straight to new round (skip timeout commit)
  326. // cs.scheduleTimeout(time.Duration(0), cs.Height, 0, cstypes.RoundStepNewHeight)
  327. cs.enterNewRound(cs.Height, 0)
  328. }
  329. return
  330. }
  331. // Height mismatch is ignored.
  332. // Not necessarily a bad peer, but not favourable behaviour.
  333. if vote.Height != cs.Height {
  334. cs.Logger.Info("Vote ignored and not added", "voteHeight", vote.Height, "csHeight", cs.Height, "peerID", peerID)
  335. return
  336. }
  337. added, err = cs.Votes.AddVote(vote, peerID)
  338. if !added {
  339. // Either duplicate, or error upon cs.Votes.AddByIndex()
  340. return
  341. }
  342. _ = cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote})
  343. cs.evsw.FireEvent(types.EventVote, vote)
  344. switch vote.Type {
  345. case tmproto.PrevoteType:
  346. if b, ok := cs.misbehaviors[cs.Height]; ok {
  347. b.ReceivePrevote(cs, vote)
  348. } else {
  349. defaultReceivePrevote(cs, vote)
  350. }
  351. case tmproto.PrecommitType:
  352. if b, ok := cs.misbehaviors[cs.Height]; ok {
  353. b.ReceivePrecommit(cs, vote)
  354. }
  355. defaultReceivePrecommit(cs, vote)
  356. default:
  357. panic(fmt.Sprintf("Unexpected vote type %v", vote.Type))
  358. }
  359. return added, err
  360. }
  361. //-----------------------------------------------------------------------------
  362. // Errors
  363. var (
  364. ErrInvalidProposalSignature = errors.New("error invalid proposal signature")
  365. ErrInvalidProposalPOLRound = errors.New("error invalid proposal POL round")
  366. ErrAddingVote = errors.New("error adding vote")
  367. ErrSignatureFoundInPastBlocks = errors.New("found signature from the same key")
  368. errPubKeyIsNotSet = errors.New("pubkey is not set. Look for \"Can't get private validator pubkey\" errors")
  369. )
  370. //-----------------------------------------------------------------------------
  371. var (
  372. msgQueueSize = 1000
  373. )
  374. // msgs from the reactor which may update the state
  375. type msgInfo struct {
  376. Msg Message `json:"msg"`
  377. PeerID p2p.NodeID `json:"peer_key"`
  378. }
  379. // internally generated messages which may update the state
  380. type timeoutInfo struct {
  381. Duration time.Duration `json:"duration"`
  382. Height int64 `json:"height"`
  383. Round int32 `json:"round"`
  384. Step cstypes.RoundStepType `json:"step"`
  385. }
  386. func (ti *timeoutInfo) String() string {
  387. return fmt.Sprintf("%v ; %d/%d %v", ti.Duration, ti.Height, ti.Round, ti.Step)
  388. }
  389. // interface to the mempool
  390. type txNotifier interface {
  391. TxsAvailable() <-chan struct{}
  392. }
  393. // interface to the evidence pool
  394. type evidencePool interface {
  395. // Adds consensus based evidence to the evidence pool where time is the time
  396. // of the block where the offense occurred and the validator set is the current one.
  397. AddEvidenceFromConsensus(evidence types.Evidence) error
  398. }
  399. //----------------------------------------
  400. // Public interface
  401. // SetLogger implements Service.
  402. func (cs *State) SetLogger(l log.Logger) {
  403. cs.BaseService.Logger = l
  404. cs.timeoutTicker.SetLogger(l)
  405. }
  406. // SetEventBus sets event bus.
  407. func (cs *State) SetEventBus(b *types.EventBus) {
  408. cs.eventBus = b
  409. cs.blockExec.SetEventBus(b)
  410. }
  411. // StateMetrics sets the metrics.
  412. func StateMetrics(metrics *Metrics) StateOption {
  413. return func(cs *State) { cs.metrics = metrics }
  414. }
  415. // String returns a string.
  416. func (cs *State) String() string {
  417. // better not to access shared variables
  418. return "ConsensusState"
  419. }
  420. // GetState returns a copy of the chain state.
  421. func (cs *State) GetState() sm.State {
  422. cs.mtx.RLock()
  423. defer cs.mtx.RUnlock()
  424. return cs.state.Copy()
  425. }
  426. // GetLastHeight returns the last height committed.
  427. // If there were no blocks, returns 0.
  428. func (cs *State) GetLastHeight() int64 {
  429. cs.mtx.RLock()
  430. defer cs.mtx.RUnlock()
  431. return cs.RoundState.Height - 1
  432. }
  433. // GetRoundState returns a shallow copy of the internal consensus state.
  434. func (cs *State) GetRoundState() *cstypes.RoundState {
  435. cs.mtx.RLock()
  436. rs := cs.RoundState // copy
  437. cs.mtx.RUnlock()
  438. return &rs
  439. }
  440. // GetRoundStateJSON returns a json of RoundState.
  441. func (cs *State) GetRoundStateJSON() ([]byte, error) {
  442. cs.mtx.RLock()
  443. defer cs.mtx.RUnlock()
  444. return tmjson.Marshal(cs.RoundState)
  445. }
  446. // GetRoundStateSimpleJSON returns a json of RoundStateSimple
  447. func (cs *State) GetRoundStateSimpleJSON() ([]byte, error) {
  448. cs.mtx.RLock()
  449. defer cs.mtx.RUnlock()
  450. return tmjson.Marshal(cs.RoundState.RoundStateSimple())
  451. }
  452. // GetValidators returns a copy of the current validators.
  453. func (cs *State) GetValidators() (int64, []*types.Validator) {
  454. cs.mtx.RLock()
  455. defer cs.mtx.RUnlock()
  456. return cs.state.LastBlockHeight, cs.state.Validators.Copy().Validators
  457. }
  458. // SetPrivValidator sets the private validator account for signing votes. It
  459. // immediately requests pubkey and caches it.
  460. func (cs *State) SetPrivValidator(priv types.PrivValidator) {
  461. cs.mtx.Lock()
  462. defer cs.mtx.Unlock()
  463. cs.privValidator = priv
  464. if err := cs.updatePrivValidatorPubKey(); err != nil {
  465. cs.Logger.Error("Can't get private validator pubkey", "err", err)
  466. }
  467. }
  468. // SetTimeoutTicker sets the local timer. It may be useful to overwrite for testing.
  469. func (cs *State) SetTimeoutTicker(timeoutTicker TimeoutTicker) {
  470. cs.mtx.Lock()
  471. cs.timeoutTicker = timeoutTicker
  472. cs.mtx.Unlock()
  473. }
  474. // LoadCommit loads the commit for a given height.
  475. func (cs *State) LoadCommit(height int64) *types.Commit {
  476. cs.mtx.RLock()
  477. defer cs.mtx.RUnlock()
  478. if height == cs.blockStore.Height() {
  479. return cs.blockStore.LoadSeenCommit(height)
  480. }
  481. return cs.blockStore.LoadBlockCommit(height)
  482. }
  483. // OnStart loads the latest state via the WAL, and starts the timeout and
  484. // receive routines.
  485. func (cs *State) OnStart() error {
  486. // We may set the WAL in testing before calling Start, so only OpenWAL if its
  487. // still the nilWAL.
  488. if _, ok := cs.wal.(nilWAL); ok {
  489. if err := cs.loadWalFile(); err != nil {
  490. return err
  491. }
  492. }
  493. // We may have lost some votes if the process crashed reload from consensus
  494. // log to catchup.
  495. if cs.doWALCatchup {
  496. repairAttempted := false
  497. LOOP:
  498. for {
  499. err := cs.catchupReplay(cs.Height)
  500. switch {
  501. case err == nil:
  502. break LOOP
  503. case !IsDataCorruptionError(err):
  504. cs.Logger.Error("Error on catchup replay. Proceeding to start State anyway", "err", err)
  505. break LOOP
  506. case repairAttempted:
  507. return err
  508. }
  509. cs.Logger.Info("WAL file is corrupted. Attempting repair", "err", err)
  510. // 1) prep work
  511. if err := cs.wal.Stop(); err != nil {
  512. return err
  513. }
  514. repairAttempted = true
  515. // 2) backup original WAL file
  516. corruptedFile := fmt.Sprintf("%s.CORRUPTED", cs.config.WalFile())
  517. if err := tmos.CopyFile(cs.config.WalFile(), corruptedFile); err != nil {
  518. return err
  519. }
  520. cs.Logger.Info("Backed up WAL file", "src", cs.config.WalFile(), "dst", corruptedFile)
  521. // 3) try to repair (WAL file will be overwritten!)
  522. if err := repairWalFile(corruptedFile, cs.config.WalFile()); err != nil {
  523. cs.Logger.Error("Repair failed", "err", err)
  524. return err
  525. }
  526. cs.Logger.Info("Successful repair")
  527. // reload WAL file
  528. if err := cs.loadWalFile(); err != nil {
  529. return err
  530. }
  531. }
  532. }
  533. if err := cs.evsw.Start(); err != nil {
  534. return err
  535. }
  536. // we need the timeoutRoutine for replay so
  537. // we don't block on the tick chan.
  538. // NOTE: we will get a build up of garbage go routines
  539. // firing on the tockChan until the receiveRoutine is started
  540. // to deal with them (by that point, at most one will be valid)
  541. if err := cs.timeoutTicker.Start(); err != nil {
  542. return err
  543. }
  544. // Double Signing Risk Reduction
  545. if err := cs.checkDoubleSigningRisk(cs.Height); err != nil {
  546. return err
  547. }
  548. // now start the receiveRoutine
  549. go cs.receiveRoutine(0)
  550. // schedule the first round!
  551. // use GetRoundState so we don't race the receiveRoutine for access
  552. cs.scheduleRound0(cs.GetRoundState())
  553. return nil
  554. }
  555. // loadWalFile loads WAL data from file. It overwrites cs.wal.
  556. func (cs *State) loadWalFile() error {
  557. wal, err := cs.OpenWAL(cs.config.WalFile())
  558. if err != nil {
  559. cs.Logger.Error("Error loading State wal", "err", err)
  560. return err
  561. }
  562. cs.wal = wal
  563. return nil
  564. }
  565. // OnStop implements service.Service.
  566. func (cs *State) OnStop() {
  567. if err := cs.evsw.Stop(); err != nil {
  568. cs.Logger.Error("error trying to stop eventSwitch", "error", err)
  569. }
  570. if err := cs.timeoutTicker.Stop(); err != nil {
  571. cs.Logger.Error("error trying to stop timeoutTicket", "error", err)
  572. }
  573. // WAL is stopped in receiveRoutine.
  574. }
  575. // Wait waits for the the main routine to return.
  576. // NOTE: be sure to Stop() the event switch and drain
  577. // any event channels or this may deadlock
  578. func (cs *State) Wait() {
  579. <-cs.done
  580. }
  581. // OpenWAL opens a file to log all consensus messages and timeouts for
  582. // deterministic accountability.
  583. func (cs *State) OpenWAL(walFile string) (WAL, error) {
  584. wal, err := NewWAL(walFile)
  585. if err != nil {
  586. cs.Logger.Error("Failed to open WAL", "file", walFile, "err", err)
  587. return nil, err
  588. }
  589. wal.SetLogger(cs.Logger.With("wal", walFile))
  590. if err := wal.Start(); err != nil {
  591. cs.Logger.Error("Failed to start WAL", "err", err)
  592. return nil, err
  593. }
  594. return wal, nil
  595. }
  596. //------------------------------------------------------------
  597. // Public interface for passing messages into the consensus state, possibly causing a state transition.
  598. // If peerID == "", the msg is considered internal.
  599. // Messages are added to the appropriate queue (peer or internal).
  600. // If the queue is full, the function may block.
  601. // TODO: should these return anything or let callers just use events?
  602. // AddVote inputs a vote.
  603. func (cs *State) AddVote(vote *types.Vote, peerID p2p.NodeID) (added bool, err error) {
  604. if peerID == "" {
  605. cs.internalMsgQueue <- msgInfo{&VoteMessage{vote}, ""}
  606. } else {
  607. cs.peerMsgQueue <- msgInfo{&VoteMessage{vote}, peerID}
  608. }
  609. // TODO: wait for event?!
  610. return false, nil
  611. }
  612. // SetProposal inputs a proposal.
  613. func (cs *State) SetProposal(proposal *types.Proposal, peerID p2p.NodeID) error {
  614. if peerID == "" {
  615. cs.internalMsgQueue <- msgInfo{&ProposalMessage{proposal}, ""}
  616. } else {
  617. cs.peerMsgQueue <- msgInfo{&ProposalMessage{proposal}, peerID}
  618. }
  619. // TODO: wait for event?!
  620. return nil
  621. }
  622. // AddProposalBlockPart inputs a part of the proposal block.
  623. func (cs *State) AddProposalBlockPart(height int64, round int32, part *types.Part, peerID p2p.NodeID) error {
  624. if peerID == "" {
  625. cs.internalMsgQueue <- msgInfo{&BlockPartMessage{height, round, part}, ""}
  626. } else {
  627. cs.peerMsgQueue <- msgInfo{&BlockPartMessage{height, round, part}, peerID}
  628. }
  629. // TODO: wait for event?!
  630. return nil
  631. }
  632. // SetProposalAndBlock inputs the proposal and all block parts.
  633. func (cs *State) SetProposalAndBlock(
  634. proposal *types.Proposal,
  635. block *types.Block,
  636. parts *types.PartSet,
  637. peerID p2p.NodeID,
  638. ) error {
  639. if err := cs.SetProposal(proposal, peerID); err != nil {
  640. return err
  641. }
  642. for i := 0; i < int(parts.Total()); i++ {
  643. part := parts.GetPart(i)
  644. if err := cs.AddProposalBlockPart(proposal.Height, proposal.Round, part, peerID); err != nil {
  645. return err
  646. }
  647. }
  648. return nil
  649. }
  650. //------------------------------------------------------------
  651. // internal functions for managing the state
  652. func (cs *State) updateHeight(height int64) {
  653. cs.metrics.Height.Set(float64(height))
  654. cs.Height = height
  655. }
  656. func (cs *State) updateRoundStep(round int32, step cstypes.RoundStepType) {
  657. cs.Round = round
  658. cs.Step = step
  659. }
  660. // enterNewRound(height, 0) at cs.StartTime.
  661. func (cs *State) scheduleRound0(rs *cstypes.RoundState) {
  662. // cs.Logger.Info("scheduleRound0", "now", tmtime.Now(), "startTime", cs.StartTime)
  663. sleepDuration := rs.StartTime.Sub(tmtime.Now())
  664. cs.scheduleTimeout(sleepDuration, rs.Height, 0, cstypes.RoundStepNewHeight)
  665. }
  666. // Attempt to schedule a timeout (by sending timeoutInfo on the tickChan)
  667. func (cs *State) scheduleTimeout(duration time.Duration, height int64, round int32, step cstypes.RoundStepType) {
  668. cs.timeoutTicker.ScheduleTimeout(timeoutInfo{duration, height, round, step})
  669. }
  670. // send a msg into the receiveRoutine regarding our own proposal, block part, or vote
  671. func (cs *State) sendInternalMessage(mi msgInfo) {
  672. select {
  673. case cs.internalMsgQueue <- mi:
  674. default:
  675. // NOTE: using the go-routine means our votes can
  676. // be processed out of order.
  677. // TODO: use CList here for strict determinism and
  678. // attempt push to internalMsgQueue in receiveRoutine
  679. cs.Logger.Info("Internal msg queue is full. Using a go-routine")
  680. go func() { cs.internalMsgQueue <- mi }()
  681. }
  682. }
  683. // Reconstruct LastCommit from SeenCommit, which we saved along with the block,
  684. // (which happens even before saving the state)
  685. func (cs *State) reconstructLastCommit(state sm.State) {
  686. seenCommit := cs.blockStore.LoadSeenCommit(state.LastBlockHeight)
  687. if seenCommit == nil {
  688. panic(fmt.Sprintf("Failed to reconstruct LastCommit: seen commit for height %v not found",
  689. state.LastBlockHeight))
  690. }
  691. lastPrecommits := types.CommitToVoteSet(state.ChainID, seenCommit, state.LastValidators)
  692. if !lastPrecommits.HasTwoThirdsMajority() {
  693. panic("Failed to reconstruct LastCommit: Does not have +2/3 maj")
  694. }
  695. cs.LastCommit = lastPrecommits
  696. }
  697. // Updates State and increments height to match that of state.
  698. // The round becomes 0 and cs.Step becomes cstypes.RoundStepNewHeight.
  699. func (cs *State) updateToState(state sm.State) {
  700. if cs.CommitRound > -1 && 0 < cs.Height && cs.Height != state.LastBlockHeight {
  701. panic(fmt.Sprintf("updateToState() expected state height of %v but found %v",
  702. cs.Height, state.LastBlockHeight))
  703. }
  704. if !cs.state.IsEmpty() {
  705. if cs.state.LastBlockHeight > 0 && cs.state.LastBlockHeight+1 != cs.Height {
  706. // This might happen when someone else is mutating cs.state.
  707. // Someone forgot to pass in state.Copy() somewhere?!
  708. panic(fmt.Sprintf("Inconsistent cs.state.LastBlockHeight+1 %v vs cs.Height %v",
  709. cs.state.LastBlockHeight+1, cs.Height))
  710. }
  711. if cs.state.LastBlockHeight > 0 && cs.Height == cs.state.InitialHeight {
  712. panic(fmt.Sprintf("Inconsistent cs.state.LastBlockHeight %v, expected 0 for initial height %v",
  713. cs.state.LastBlockHeight, cs.state.InitialHeight))
  714. }
  715. // If state isn't further out than cs.state, just ignore.
  716. // This happens when SwitchToConsensus() is called in the reactor.
  717. // We don't want to reset e.g. the Votes, but we still want to
  718. // signal the new round step, because other services (eg. txNotifier)
  719. // depend on having an up-to-date peer state!
  720. if state.LastBlockHeight <= cs.state.LastBlockHeight {
  721. cs.Logger.Info(
  722. "Ignoring updateToState()",
  723. "newHeight",
  724. state.LastBlockHeight+1,
  725. "oldHeight",
  726. cs.state.LastBlockHeight+1)
  727. cs.newStep()
  728. return
  729. }
  730. }
  731. // Reset fields based on state.
  732. validators := state.Validators
  733. switch {
  734. case state.LastBlockHeight == 0: // Very first commit should be empty.
  735. cs.LastCommit = (*types.VoteSet)(nil)
  736. case cs.CommitRound > -1 && cs.Votes != nil: // Otherwise, use cs.Votes
  737. if !cs.Votes.Precommits(cs.CommitRound).HasTwoThirdsMajority() {
  738. panic(fmt.Sprintf("Wanted to form a Commit, but Precommits (H/R: %d/%d) didn't have 2/3+: %v",
  739. state.LastBlockHeight,
  740. cs.CommitRound,
  741. cs.Votes.Precommits(cs.CommitRound)))
  742. }
  743. cs.LastCommit = cs.Votes.Precommits(cs.CommitRound)
  744. case cs.LastCommit == nil:
  745. // NOTE: when Tendermint starts, it has no votes. reconstructLastCommit
  746. // must be called to reconstruct LastCommit from SeenCommit.
  747. panic(fmt.Sprintf("LastCommit cannot be empty after initial block (H:%d)",
  748. state.LastBlockHeight+1,
  749. ))
  750. }
  751. // Next desired block height
  752. height := state.LastBlockHeight + 1
  753. if height == 1 {
  754. height = state.InitialHeight
  755. }
  756. // RoundState fields
  757. cs.updateHeight(height)
  758. cs.updateRoundStep(0, cstypes.RoundStepNewHeight)
  759. if cs.CommitTime.IsZero() {
  760. // "Now" makes it easier to sync up dev nodes.
  761. // We add timeoutCommit to allow transactions
  762. // to be gathered for the first block.
  763. // And alternative solution that relies on clocks:
  764. // cs.StartTime = state.LastBlockTime.Add(timeoutCommit)
  765. cs.StartTime = cs.config.Commit(tmtime.Now())
  766. } else {
  767. cs.StartTime = cs.config.Commit(cs.CommitTime)
  768. }
  769. cs.Validators = validators
  770. cs.Proposal = nil
  771. cs.ProposalBlock = nil
  772. cs.ProposalBlockParts = nil
  773. cs.LockedRound = -1
  774. cs.LockedBlock = nil
  775. cs.LockedBlockParts = nil
  776. cs.ValidRound = -1
  777. cs.ValidBlock = nil
  778. cs.ValidBlockParts = nil
  779. cs.Votes = cstypes.NewHeightVoteSet(state.ChainID, height, validators)
  780. cs.CommitRound = -1
  781. cs.LastValidators = state.LastValidators
  782. cs.TriggeredTimeoutPrecommit = false
  783. cs.state = state
  784. // Finally, broadcast RoundState
  785. cs.newStep()
  786. }
  787. func (cs *State) newStep() {
  788. rs := cs.RoundStateEvent()
  789. if err := cs.wal.Write(rs); err != nil {
  790. cs.Logger.Error("Error writing to wal", "err", err)
  791. }
  792. cs.nSteps++
  793. // newStep is called by updateToState in NewState before the eventBus is set!
  794. if cs.eventBus != nil {
  795. if err := cs.eventBus.PublishEventNewRoundStep(rs); err != nil {
  796. cs.Logger.Error("Error publishing new round step", "err", err)
  797. }
  798. cs.evsw.FireEvent(types.EventNewRoundStep, &cs.RoundState)
  799. }
  800. }
  801. //-----------------------------------------
  802. // the main go routines
  803. // receiveRoutine handles messages which may cause state transitions.
  804. // it's argument (n) is the number of messages to process before exiting - use 0 to run forever
  805. // It keeps the RoundState and is the only thing that updates it.
  806. // Updates (state transitions) happen on timeouts, complete proposals, and 2/3 majorities.
  807. // State must be locked before any internal state is updated.
  808. func (cs *State) receiveRoutine(maxSteps int) {
  809. onExit := func(cs *State) {
  810. // NOTE: the internalMsgQueue may have signed messages from our
  811. // priv_val that haven't hit the WAL, but its ok because
  812. // priv_val tracks LastSig
  813. // close wal now that we're done writing to it
  814. if err := cs.wal.Stop(); err != nil {
  815. cs.Logger.Error("error trying to stop wal", "error", err)
  816. }
  817. cs.wal.Wait()
  818. close(cs.done)
  819. }
  820. defer func() {
  821. if r := recover(); r != nil {
  822. cs.Logger.Error("CONSENSUS FAILURE!!!", "err", r, "stack", string(debug.Stack()))
  823. // stop gracefully
  824. //
  825. // NOTE: We most probably shouldn't be running any further when there is
  826. // some unexpected panic. Some unknown error happened, and so we don't
  827. // know if that will result in the validator signing an invalid thing. It
  828. // might be worthwhile to explore a mechanism for manual resuming via
  829. // some console or secure RPC system, but for now, halting the chain upon
  830. // unexpected consensus bugs sounds like the better option.
  831. onExit(cs)
  832. }
  833. }()
  834. for {
  835. if maxSteps > 0 {
  836. if cs.nSteps >= maxSteps {
  837. cs.Logger.Info("reached max steps. exiting receive routine")
  838. cs.nSteps = 0
  839. return
  840. }
  841. }
  842. rs := cs.RoundState
  843. var mi msgInfo
  844. select {
  845. case <-cs.txNotifier.TxsAvailable():
  846. cs.handleTxsAvailable()
  847. case mi = <-cs.peerMsgQueue:
  848. if err := cs.wal.Write(mi); err != nil {
  849. cs.Logger.Error("Error writing to wal", "err", err)
  850. }
  851. // handles proposals, block parts, votes
  852. // may generate internal events (votes, complete proposals, 2/3 majorities)
  853. cs.handleMsg(mi)
  854. case mi = <-cs.internalMsgQueue:
  855. err := cs.wal.WriteSync(mi) // NOTE: fsync
  856. if err != nil {
  857. panic(fmt.Sprintf("Failed to write %v msg to consensus wal due to %v. Check your FS and restart the node", mi, err))
  858. }
  859. if _, ok := mi.Msg.(*VoteMessage); ok {
  860. // we actually want to simulate failing during
  861. // the previous WriteSync, but this isn't easy to do.
  862. // Equivalent would be to fail here and manually remove
  863. // some bytes from the end of the wal.
  864. fail.Fail() // XXX
  865. }
  866. // handles proposals, block parts, votes
  867. cs.handleMsg(mi)
  868. case ti := <-cs.timeoutTicker.Chan(): // tockChan:
  869. if err := cs.wal.Write(ti); err != nil {
  870. cs.Logger.Error("Error writing to wal", "err", err)
  871. }
  872. // if the timeout is relevant to the rs
  873. // go to the next step
  874. cs.handleTimeout(ti, rs)
  875. case <-cs.Quit():
  876. onExit(cs)
  877. return
  878. }
  879. }
  880. }
  881. func (cs *State) handleTimeout(ti timeoutInfo, rs cstypes.RoundState) {
  882. cs.Logger.Debug("Received tock", "timeout", ti.Duration, "height", ti.Height, "round", ti.Round, "step", ti.Step)
  883. // timeouts must be for current height, round, step
  884. if ti.Height != rs.Height || ti.Round < rs.Round || (ti.Round == rs.Round && ti.Step < rs.Step) {
  885. cs.Logger.Debug("Ignoring tock because we're ahead", "height", rs.Height, "round", rs.Round, "step", rs.Step)
  886. return
  887. }
  888. // the timeout will now cause a state transition
  889. cs.mtx.Lock()
  890. defer cs.mtx.Unlock()
  891. switch ti.Step {
  892. case cstypes.RoundStepNewHeight:
  893. // NewRound event fired from enterNewRound.
  894. // XXX: should we fire timeout here (for timeout commit)?
  895. cs.enterNewRound(ti.Height, 0)
  896. case cstypes.RoundStepNewRound:
  897. cs.enterPropose(ti.Height, 0)
  898. case cstypes.RoundStepPropose:
  899. if err := cs.eventBus.PublishEventTimeoutPropose(cs.RoundStateEvent()); err != nil {
  900. cs.Logger.Error("Error publishing timeout propose", "err", err)
  901. }
  902. cs.enterPrevote(ti.Height, ti.Round)
  903. case cstypes.RoundStepPrevoteWait:
  904. if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil {
  905. cs.Logger.Error("Error publishing timeout wait", "err", err)
  906. }
  907. cs.enterPrecommit(ti.Height, ti.Round)
  908. case cstypes.RoundStepPrecommitWait:
  909. if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil {
  910. cs.Logger.Error("Error publishing timeout wait", "err", err)
  911. }
  912. cs.enterPrecommit(ti.Height, ti.Round)
  913. cs.enterNewRound(ti.Height, ti.Round+1)
  914. default:
  915. panic(fmt.Sprintf("Invalid timeout step: %v", ti.Step))
  916. }
  917. }
  918. func (cs *State) handleTxsAvailable() {
  919. cs.mtx.Lock()
  920. defer cs.mtx.Unlock()
  921. // We only need to do this for round 0.
  922. if cs.Round != 0 {
  923. return
  924. }
  925. switch cs.Step {
  926. case cstypes.RoundStepNewHeight: // timeoutCommit phase
  927. if cs.needProofBlock(cs.Height) {
  928. // enterPropose will be called by enterNewRound
  929. return
  930. }
  931. // +1ms to ensure RoundStepNewRound timeout always happens after RoundStepNewHeight
  932. timeoutCommit := cs.StartTime.Sub(tmtime.Now()) + 1*time.Millisecond
  933. cs.scheduleTimeout(timeoutCommit, cs.Height, 0, cstypes.RoundStepNewRound)
  934. case cstypes.RoundStepNewRound: // after timeoutCommit
  935. cs.enterPropose(cs.Height, 0)
  936. }
  937. }
  938. //-----------------------------------------------------------------------------
  939. // State functions
  940. // Used internally by handleTimeout and handleMsg to make state transitions
  941. // Enter: `timeoutNewHeight` by startTime (commitTime+timeoutCommit),
  942. // or, if SkipTimeoutCommit==true, after receiving all precommits from (height,round-1)
  943. // Enter: `timeoutPrecommits` after any +2/3 precommits from (height,round-1)
  944. // Enter: +2/3 precommits for nil at (height,round-1)
  945. // Enter: +2/3 prevotes any or +2/3 precommits for block or any from (height, round)
  946. // NOTE: cs.StartTime was already set for height.
  947. func (cs *State) enterNewRound(height int64, round int32) {
  948. logger := cs.Logger.With("height", height, "round", round)
  949. if cs.Height != height || round < cs.Round || (cs.Round == round && cs.Step != cstypes.RoundStepNewHeight) {
  950. logger.Debug(fmt.Sprintf(
  951. "enterNewRound(%v/%v): Invalid args. Current step: %v/%v/%v",
  952. height,
  953. round,
  954. cs.Height,
  955. cs.Round,
  956. cs.Step))
  957. return
  958. }
  959. if now := tmtime.Now(); cs.StartTime.After(now) {
  960. logger.Info("Need to set a buffer and log message here for sanity.", "startTime", cs.StartTime, "now", now)
  961. }
  962. logger.Info(fmt.Sprintf("enterNewRound(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step))
  963. // Increment validators if necessary
  964. validators := cs.Validators
  965. if cs.Round < round {
  966. validators = validators.Copy()
  967. validators.IncrementProposerPriority(tmmath.SafeSubInt32(round, cs.Round))
  968. }
  969. // Setup new round
  970. // we don't fire newStep for this step,
  971. // but we fire an event, so update the round step first
  972. cs.updateRoundStep(round, cstypes.RoundStepNewRound)
  973. cs.Validators = validators
  974. if round == 0 {
  975. // We've already reset these upon new height,
  976. // and meanwhile we might have received a proposal
  977. // for round 0.
  978. } else {
  979. logger.Info("Resetting Proposal info")
  980. cs.Proposal = nil
  981. cs.ProposalBlock = nil
  982. cs.ProposalBlockParts = nil
  983. }
  984. cs.Votes.SetRound(tmmath.SafeAddInt32(round, 1)) // also track next round (round+1) to allow round-skipping
  985. cs.TriggeredTimeoutPrecommit = false
  986. if err := cs.eventBus.PublishEventNewRound(cs.NewRoundEvent()); err != nil {
  987. cs.Logger.Error("Error publishing new round", "err", err)
  988. }
  989. cs.metrics.Rounds.Set(float64(round))
  990. // Wait for txs to be available in the mempool
  991. // before we enterPropose in round 0. If the last block changed the app hash,
  992. // we may need an empty "proof" block, and enterPropose immediately.
  993. waitForTxs := cs.config.WaitForTxs() && round == 0 && !cs.needProofBlock(height)
  994. if waitForTxs {
  995. if cs.config.CreateEmptyBlocksInterval > 0 {
  996. cs.scheduleTimeout(cs.config.CreateEmptyBlocksInterval, height, round,
  997. cstypes.RoundStepNewRound)
  998. }
  999. } else {
  1000. cs.enterPropose(height, round)
  1001. }
  1002. }
  1003. // needProofBlock returns true on the first height (so the genesis app hash is signed right away)
  1004. // and where the last block (height-1) caused the app hash to change
  1005. func (cs *State) needProofBlock(height int64) bool {
  1006. if height == cs.state.InitialHeight {
  1007. return true
  1008. }
  1009. lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1)
  1010. if lastBlockMeta == nil {
  1011. panic(fmt.Sprintf("needProofBlock: last block meta for height %d not found", height-1))
  1012. }
  1013. return !bytes.Equal(cs.state.AppHash, lastBlockMeta.Header.AppHash)
  1014. }
  1015. func (cs *State) isProposer(address []byte) bool {
  1016. return bytes.Equal(cs.Validators.GetProposer().Address, address)
  1017. }
  1018. func (cs *State) defaultDecideProposal(height int64, round int32) {
  1019. var block *types.Block
  1020. var blockParts *types.PartSet
  1021. // Decide on block
  1022. if cs.ValidBlock != nil {
  1023. // If there is valid block, choose that.
  1024. block, blockParts = cs.ValidBlock, cs.ValidBlockParts
  1025. } else {
  1026. // Create a new proposal block from state/txs from the mempool.
  1027. block, blockParts = cs.createProposalBlock()
  1028. if block == nil {
  1029. return
  1030. }
  1031. }
  1032. // Flush the WAL. Otherwise, we may not recompute the same proposal to sign,
  1033. // and the privValidator will refuse to sign anything.
  1034. if err := cs.wal.FlushAndSync(); err != nil {
  1035. cs.Logger.Error("Error flushing to disk")
  1036. }
  1037. // Make proposal
  1038. propBlockID := types.BlockID{Hash: block.Hash(), PartSetHeader: blockParts.Header()}
  1039. proposal := types.NewProposal(height, round, cs.ValidRound, propBlockID)
  1040. p := proposal.ToProto()
  1041. if err := cs.privValidator.SignProposal(cs.state.ChainID, p); err == nil {
  1042. proposal.Signature = p.Signature
  1043. // send proposal and block parts on internal msg queue
  1044. cs.sendInternalMessage(msgInfo{&ProposalMessage{proposal}, ""})
  1045. for i := 0; i < int(blockParts.Total()); i++ {
  1046. part := blockParts.GetPart(i)
  1047. cs.sendInternalMessage(msgInfo{&BlockPartMessage{cs.Height, cs.Round, part}, ""})
  1048. }
  1049. cs.Logger.Info("Signed proposal", "height", height, "round", round, "proposal", proposal)
  1050. cs.Logger.Debug(fmt.Sprintf("Signed proposal block: %v", block))
  1051. } else if !cs.replayMode {
  1052. cs.Logger.Error("enterPropose: Error signing proposal", "height", height, "round", round, "err", err)
  1053. }
  1054. }
  1055. // Returns true if the proposal block is complete &&
  1056. // (if POLRound was proposed, we have +2/3 prevotes from there).
  1057. func (cs *State) isProposalComplete() bool {
  1058. if cs.Proposal == nil || cs.ProposalBlock == nil {
  1059. return false
  1060. }
  1061. // we have the proposal. if there's a POLRound,
  1062. // make sure we have the prevotes from it too
  1063. if cs.Proposal.POLRound < 0 {
  1064. return true
  1065. }
  1066. // if this is false the proposer is lying or we haven't received the POL yet
  1067. return cs.Votes.Prevotes(cs.Proposal.POLRound).HasTwoThirdsMajority()
  1068. }
  1069. // Create the next block to propose and return it. Returns nil block upon error.
  1070. //
  1071. // We really only need to return the parts, but the block is returned for
  1072. // convenience so we can log the proposal block.
  1073. //
  1074. // NOTE: keep it side-effect free for clarity.
  1075. // CONTRACT: cs.privValidator is not nil.
  1076. func (cs *State) createProposalBlock() (block *types.Block, blockParts *types.PartSet) {
  1077. if cs.privValidator == nil {
  1078. panic("entered createProposalBlock with privValidator being nil")
  1079. }
  1080. var commit *types.Commit
  1081. switch {
  1082. case cs.Height == cs.state.InitialHeight:
  1083. // We're creating a proposal for the first block.
  1084. // The commit is empty, but not nil.
  1085. commit = types.NewCommit(0, 0, types.BlockID{}, nil)
  1086. case cs.LastCommit.HasTwoThirdsMajority():
  1087. // Make the commit from LastCommit
  1088. commit = cs.LastCommit.MakeCommit()
  1089. default: // This shouldn't happen.
  1090. cs.Logger.Error("enterPropose: Cannot propose anything: No commit for the previous block")
  1091. return
  1092. }
  1093. if cs.privValidatorPubKey == nil {
  1094. // If this node is a validator & proposer in the current round, it will
  1095. // miss the opportunity to create a block.
  1096. cs.Logger.Error(fmt.Sprintf("enterPropose: %v", errPubKeyIsNotSet))
  1097. return
  1098. }
  1099. proposerAddr := cs.privValidatorPubKey.Address()
  1100. return cs.blockExec.CreateProposalBlock(cs.Height, cs.state, commit, proposerAddr)
  1101. }
  1102. // Enter: any +2/3 prevotes at next round.
  1103. func (cs *State) enterPrevoteWait(height int64, round int32) {
  1104. logger := cs.Logger.With("height", height, "round", round)
  1105. if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevoteWait <= cs.Step) {
  1106. logger.Debug(fmt.Sprintf(
  1107. "enterPrevoteWait(%v/%v): Invalid args. Current step: %v/%v/%v",
  1108. height,
  1109. round,
  1110. cs.Height,
  1111. cs.Round,
  1112. cs.Step))
  1113. return
  1114. }
  1115. if !cs.Votes.Prevotes(round).HasTwoThirdsAny() {
  1116. panic(fmt.Sprintf("enterPrevoteWait(%v/%v), but Prevotes does not have any +2/3 votes", height, round))
  1117. }
  1118. logger.Info(fmt.Sprintf("enterPrevoteWait(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step))
  1119. defer func() {
  1120. // Done enterPrevoteWait:
  1121. cs.updateRoundStep(round, cstypes.RoundStepPrevoteWait)
  1122. cs.newStep()
  1123. }()
  1124. // Wait for some more prevotes; enterPrecommit
  1125. cs.scheduleTimeout(cs.config.Prevote(round), height, round, cstypes.RoundStepPrevoteWait)
  1126. }
  1127. // Enter: any +2/3 precommits for next round.
  1128. func (cs *State) enterPrecommitWait(height int64, round int32) {
  1129. logger := cs.Logger.With("height", height, "round", round)
  1130. if cs.Height != height || round < cs.Round || (cs.Round == round && cs.TriggeredTimeoutPrecommit) {
  1131. logger.Debug(
  1132. fmt.Sprintf(
  1133. "enterPrecommitWait(%v/%v): Invalid args. "+
  1134. "Current state is Height/Round: %v/%v/, TriggeredTimeoutPrecommit:%v",
  1135. height, round, cs.Height, cs.Round, cs.TriggeredTimeoutPrecommit))
  1136. return
  1137. }
  1138. if !cs.Votes.Precommits(round).HasTwoThirdsAny() {
  1139. panic(fmt.Sprintf("enterPrecommitWait(%v/%v), but Precommits does not have any +2/3 votes", height, round))
  1140. }
  1141. logger.Info(fmt.Sprintf("enterPrecommitWait(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step))
  1142. defer func() {
  1143. // Done enterPrecommitWait:
  1144. cs.TriggeredTimeoutPrecommit = true
  1145. cs.newStep()
  1146. }()
  1147. // Wait for some more precommits; enterNewRound
  1148. cs.scheduleTimeout(cs.config.Precommit(round), height, round, cstypes.RoundStepPrecommitWait)
  1149. }
  1150. // Enter: +2/3 precommits for block
  1151. func (cs *State) enterCommit(height int64, commitRound int32) {
  1152. logger := cs.Logger.With("height", height, "commitRound", commitRound)
  1153. if cs.Height != height || cstypes.RoundStepCommit <= cs.Step {
  1154. logger.Debug(fmt.Sprintf(
  1155. "enterCommit(%v/%v): Invalid args. Current step: %v/%v/%v",
  1156. height,
  1157. commitRound,
  1158. cs.Height,
  1159. cs.Round,
  1160. cs.Step))
  1161. return
  1162. }
  1163. logger.Info(fmt.Sprintf("enterCommit(%v/%v). Current: %v/%v/%v", height, commitRound, cs.Height, cs.Round, cs.Step))
  1164. defer func() {
  1165. // Done enterCommit:
  1166. // keep cs.Round the same, commitRound points to the right Precommits set.
  1167. cs.updateRoundStep(cs.Round, cstypes.RoundStepCommit)
  1168. cs.CommitRound = commitRound
  1169. cs.CommitTime = tmtime.Now()
  1170. cs.newStep()
  1171. // Maybe finalize immediately.
  1172. cs.tryFinalizeCommit(height)
  1173. }()
  1174. blockID, ok := cs.Votes.Precommits(commitRound).TwoThirdsMajority()
  1175. if !ok {
  1176. panic("RunActionCommit() expects +2/3 precommits")
  1177. }
  1178. // The Locked* fields no longer matter.
  1179. // Move them over to ProposalBlock if they match the commit hash,
  1180. // otherwise they'll be cleared in updateToState.
  1181. if cs.LockedBlock.HashesTo(blockID.Hash) {
  1182. logger.Info("Commit is for locked block. Set ProposalBlock=LockedBlock", "blockHash", blockID.Hash)
  1183. cs.ProposalBlock = cs.LockedBlock
  1184. cs.ProposalBlockParts = cs.LockedBlockParts
  1185. }
  1186. // If we don't have the block being committed, set up to get it.
  1187. if !cs.ProposalBlock.HashesTo(blockID.Hash) {
  1188. if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) {
  1189. logger.Info(
  1190. "Commit is for a block we don't know about. Set ProposalBlock=nil",
  1191. "proposal",
  1192. cs.ProposalBlock.Hash(),
  1193. "commit",
  1194. blockID.Hash)
  1195. // We're getting the wrong block.
  1196. // Set up ProposalBlockParts and keep waiting.
  1197. cs.ProposalBlock = nil
  1198. cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader)
  1199. if err := cs.eventBus.PublishEventValidBlock(cs.RoundStateEvent()); err != nil {
  1200. cs.Logger.Error("Error publishing valid block", "err", err)
  1201. }
  1202. cs.evsw.FireEvent(types.EventValidBlock, &cs.RoundState)
  1203. }
  1204. // else {
  1205. // We just need to keep waiting.
  1206. // }
  1207. }
  1208. }
  1209. // If we have the block AND +2/3 commits for it, finalize.
  1210. func (cs *State) tryFinalizeCommit(height int64) {
  1211. logger := cs.Logger.With("height", height)
  1212. if cs.Height != height {
  1213. panic(fmt.Sprintf("tryFinalizeCommit() cs.Height: %v vs height: %v", cs.Height, height))
  1214. }
  1215. blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority()
  1216. if !ok || len(blockID.Hash) == 0 {
  1217. logger.Error("Attempt to finalize failed. There was no +2/3 majority, or +2/3 was for <nil>.")
  1218. return
  1219. }
  1220. if !cs.ProposalBlock.HashesTo(blockID.Hash) {
  1221. // TODO: this happens every time if we're not a validator (ugly logs)
  1222. // TODO: ^^ wait, why does it matter that we're a validator?
  1223. logger.Info(
  1224. "Attempt to finalize failed. We don't have the commit block.",
  1225. "proposal-block",
  1226. cs.ProposalBlock.Hash(),
  1227. "commit-block",
  1228. blockID.Hash)
  1229. return
  1230. }
  1231. // go
  1232. cs.finalizeCommit(height)
  1233. }
  1234. // Increment height and goto cstypes.RoundStepNewHeight
  1235. func (cs *State) finalizeCommit(height int64) {
  1236. if cs.Height != height || cs.Step != cstypes.RoundStepCommit {
  1237. cs.Logger.Debug(fmt.Sprintf(
  1238. "finalizeCommit(%v): Invalid args. Current step: %v/%v/%v",
  1239. height,
  1240. cs.Height,
  1241. cs.Round,
  1242. cs.Step))
  1243. return
  1244. }
  1245. blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority()
  1246. block, blockParts := cs.ProposalBlock, cs.ProposalBlockParts
  1247. if !ok {
  1248. panic("Cannot finalizeCommit, commit does not have two thirds majority")
  1249. }
  1250. if !blockParts.HasHeader(blockID.PartSetHeader) {
  1251. panic("Expected ProposalBlockParts header to be commit header")
  1252. }
  1253. if !block.HashesTo(blockID.Hash) {
  1254. panic("Cannot finalizeCommit, ProposalBlock does not hash to commit hash")
  1255. }
  1256. if err := cs.blockExec.ValidateBlock(cs.state, block); err != nil {
  1257. panic(fmt.Errorf("+2/3 committed an invalid block: %w", err))
  1258. }
  1259. cs.Logger.Info("Finalizing commit of block with N txs",
  1260. "height", block.Height,
  1261. "hash", block.Hash(),
  1262. "root", block.AppHash,
  1263. "N", len(block.Txs))
  1264. cs.Logger.Info(fmt.Sprintf("%v", block))
  1265. fail.Fail() // XXX
  1266. // Save to blockStore.
  1267. if cs.blockStore.Height() < block.Height {
  1268. // NOTE: the seenCommit is local justification to commit this block,
  1269. // but may differ from the LastCommit included in the next block
  1270. precommits := cs.Votes.Precommits(cs.CommitRound)
  1271. seenCommit := precommits.MakeCommit()
  1272. cs.blockStore.SaveBlock(block, blockParts, seenCommit)
  1273. } else {
  1274. // Happens during replay if we already saved the block but didn't commit
  1275. cs.Logger.Info("Calling finalizeCommit on already stored block", "height", block.Height)
  1276. }
  1277. fail.Fail() // XXX
  1278. // Write EndHeightMessage{} for this height, implying that the blockstore
  1279. // has saved the block.
  1280. //
  1281. // If we crash before writing this EndHeightMessage{}, we will recover by
  1282. // running ApplyBlock during the ABCI handshake when we restart. If we
  1283. // didn't save the block to the blockstore before writing
  1284. // EndHeightMessage{}, we'd have to change WAL replay -- currently it
  1285. // complains about replaying for heights where an #ENDHEIGHT entry already
  1286. // exists.
  1287. //
  1288. // Either way, the State should not be resumed until we
  1289. // successfully call ApplyBlock (ie. later here, or in Handshake after
  1290. // restart).
  1291. endMsg := EndHeightMessage{height}
  1292. if err := cs.wal.WriteSync(endMsg); err != nil { // NOTE: fsync
  1293. panic(fmt.Sprintf("Failed to write %v msg to consensus wal due to %v. Check your FS and restart the node",
  1294. endMsg, err))
  1295. }
  1296. fail.Fail() // XXX
  1297. // Create a copy of the state for staging and an event cache for txs.
  1298. stateCopy := cs.state.Copy()
  1299. // Execute and commit the block, update and save the state, and update the mempool.
  1300. // NOTE The block.AppHash wont reflect these txs until the next block.
  1301. var err error
  1302. var retainHeight int64
  1303. stateCopy, retainHeight, err = cs.blockExec.ApplyBlock(
  1304. stateCopy,
  1305. types.BlockID{Hash: block.Hash(), PartSetHeader: blockParts.Header()},
  1306. block)
  1307. if err != nil {
  1308. cs.Logger.Error("Error on ApplyBlock", "err", err)
  1309. return
  1310. }
  1311. fail.Fail() // XXX
  1312. // Prune old heights, if requested by ABCI app.
  1313. if retainHeight > 0 {
  1314. pruned, err := cs.pruneBlocks(retainHeight)
  1315. if err != nil {
  1316. cs.Logger.Error("Failed to prune blocks", "retainHeight", retainHeight, "err", err)
  1317. } else {
  1318. cs.Logger.Info("Pruned blocks", "pruned", pruned, "retainHeight", retainHeight)
  1319. }
  1320. }
  1321. // must be called before we update state
  1322. cs.recordMetrics(height, block)
  1323. // NewHeightStep!
  1324. cs.updateToState(stateCopy)
  1325. fail.Fail() // XXX
  1326. // Private validator might have changed it's key pair => refetch pubkey.
  1327. if err := cs.updatePrivValidatorPubKey(); err != nil {
  1328. cs.Logger.Error("Can't get private validator pubkey", "err", err)
  1329. }
  1330. // cs.StartTime is already set.
  1331. // Schedule Round0 to start soon.
  1332. cs.scheduleRound0(&cs.RoundState)
  1333. // By here,
  1334. // * cs.Height has been increment to height+1
  1335. // * cs.Step is now cstypes.RoundStepNewHeight
  1336. // * cs.StartTime is set to when we will start round0.
  1337. }
  1338. func (cs *State) pruneBlocks(retainHeight int64) (uint64, error) {
  1339. base := cs.blockStore.Base()
  1340. if retainHeight <= base {
  1341. return 0, nil
  1342. }
  1343. pruned, err := cs.blockStore.PruneBlocks(retainHeight)
  1344. if err != nil {
  1345. return 0, fmt.Errorf("failed to prune block store: %w", err)
  1346. }
  1347. err = cs.blockExec.Store().PruneStates(base, retainHeight)
  1348. if err != nil {
  1349. return 0, fmt.Errorf("failed to prune state database: %w", err)
  1350. }
  1351. return pruned, nil
  1352. }
  1353. func (cs *State) recordMetrics(height int64, block *types.Block) {
  1354. cs.metrics.Validators.Set(float64(cs.Validators.Size()))
  1355. cs.metrics.ValidatorsPower.Set(float64(cs.Validators.TotalVotingPower()))
  1356. var (
  1357. missingValidators int
  1358. missingValidatorsPower int64
  1359. )
  1360. // height=0 -> MissingValidators and MissingValidatorsPower are both 0.
  1361. // Remember that the first LastCommit is intentionally empty, so it's not
  1362. // fair to increment missing validators number.
  1363. if height > cs.state.InitialHeight {
  1364. // Sanity check that commit size matches validator set size - only applies
  1365. // after first block.
  1366. var (
  1367. commitSize = block.LastCommit.Size()
  1368. valSetLen = len(cs.LastValidators.Validators)
  1369. address types.Address
  1370. )
  1371. if commitSize != valSetLen {
  1372. panic(fmt.Sprintf("commit size (%d) doesn't match valset length (%d) at height %d\n\n%v\n\n%v",
  1373. commitSize, valSetLen, block.Height, block.LastCommit.Signatures, cs.LastValidators.Validators))
  1374. }
  1375. if cs.privValidator != nil {
  1376. if cs.privValidatorPubKey == nil {
  1377. // Metrics won't be updated, but it's not critical.
  1378. cs.Logger.Error(fmt.Sprintf("recordMetrics: %v", errPubKeyIsNotSet))
  1379. } else {
  1380. address = cs.privValidatorPubKey.Address()
  1381. }
  1382. }
  1383. for i, val := range cs.LastValidators.Validators {
  1384. commitSig := block.LastCommit.Signatures[i]
  1385. if commitSig.Absent() {
  1386. missingValidators++
  1387. missingValidatorsPower += val.VotingPower
  1388. }
  1389. if bytes.Equal(val.Address, address) {
  1390. label := []string{
  1391. "validator_address", val.Address.String(),
  1392. }
  1393. cs.metrics.ValidatorPower.With(label...).Set(float64(val.VotingPower))
  1394. if commitSig.ForBlock() {
  1395. cs.metrics.ValidatorLastSignedHeight.With(label...).Set(float64(height))
  1396. } else {
  1397. cs.metrics.ValidatorMissedBlocks.With(label...).Add(float64(1))
  1398. }
  1399. }
  1400. }
  1401. }
  1402. cs.metrics.MissingValidators.Set(float64(missingValidators))
  1403. cs.metrics.MissingValidatorsPower.Set(float64(missingValidatorsPower))
  1404. // NOTE: byzantine validators power and count is only for consensus evidence i.e. duplicate vote
  1405. var (
  1406. byzantineValidatorsPower = int64(0)
  1407. byzantineValidatorsCount = int64(0)
  1408. )
  1409. for _, ev := range block.Evidence.Evidence {
  1410. if dve, ok := ev.(*types.DuplicateVoteEvidence); ok {
  1411. if _, val := cs.Validators.GetByAddress(dve.VoteA.ValidatorAddress); val != nil {
  1412. byzantineValidatorsCount++
  1413. byzantineValidatorsPower += val.VotingPower
  1414. }
  1415. }
  1416. }
  1417. cs.metrics.ByzantineValidators.Set(float64(byzantineValidatorsCount))
  1418. cs.metrics.ByzantineValidatorsPower.Set(float64(byzantineValidatorsPower))
  1419. if height > 1 {
  1420. lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1)
  1421. if lastBlockMeta != nil {
  1422. cs.metrics.BlockIntervalSeconds.Observe(
  1423. block.Time.Sub(lastBlockMeta.Header.Time).Seconds(),
  1424. )
  1425. }
  1426. }
  1427. cs.metrics.NumTxs.Set(float64(len(block.Data.Txs)))
  1428. cs.metrics.TotalTxs.Add(float64(len(block.Data.Txs)))
  1429. cs.metrics.BlockSizeBytes.Set(float64(block.Size()))
  1430. cs.metrics.CommittedHeight.Set(float64(block.Height))
  1431. }
  1432. //-----------------------------------------------------------------------------
  1433. // NOTE: block is not necessarily valid.
  1434. // Asynchronously triggers either enterPrevote (before we timeout of propose) or tryFinalizeCommit,
  1435. // once we have the full block.
  1436. func (cs *State) addProposalBlockPart(msg *BlockPartMessage, peerID p2p.NodeID) (added bool, err error) {
  1437. height, round, part := msg.Height, msg.Round, msg.Part
  1438. // Blocks might be reused, so round mismatch is OK
  1439. if cs.Height != height {
  1440. cs.Logger.Debug("Received block part from wrong height", "height", height, "round", round)
  1441. return false, nil
  1442. }
  1443. // We're not expecting a block part.
  1444. if cs.ProposalBlockParts == nil {
  1445. // NOTE: this can happen when we've gone to a higher round and
  1446. // then receive parts from the previous round - not necessarily a bad peer.
  1447. cs.Logger.Info("Received a block part when we're not expecting any",
  1448. "height", height, "round", round, "index", part.Index, "peer", peerID)
  1449. return false, nil
  1450. }
  1451. added, err = cs.ProposalBlockParts.AddPart(part)
  1452. if err != nil {
  1453. return added, err
  1454. }
  1455. if cs.ProposalBlockParts.ByteSize() > cs.state.ConsensusParams.Block.MaxBytes {
  1456. return added, fmt.Errorf("total size of proposal block parts exceeds maximum block bytes (%d > %d)",
  1457. cs.ProposalBlockParts.ByteSize(), cs.state.ConsensusParams.Block.MaxBytes,
  1458. )
  1459. }
  1460. if added && cs.ProposalBlockParts.IsComplete() {
  1461. bz, err := ioutil.ReadAll(cs.ProposalBlockParts.GetReader())
  1462. if err != nil {
  1463. return added, err
  1464. }
  1465. var pbb = new(tmproto.Block)
  1466. err = proto.Unmarshal(bz, pbb)
  1467. if err != nil {
  1468. return added, err
  1469. }
  1470. block, err := types.BlockFromProto(pbb)
  1471. if err != nil {
  1472. return added, err
  1473. }
  1474. cs.ProposalBlock = block
  1475. // NOTE: it's possible to receive complete proposal blocks for future rounds without having the proposal
  1476. cs.Logger.Info("Received complete proposal block", "height", cs.ProposalBlock.Height, "hash", cs.ProposalBlock.Hash())
  1477. if err := cs.eventBus.PublishEventCompleteProposal(cs.CompleteProposalEvent()); err != nil {
  1478. cs.Logger.Error("Error publishing event complete proposal", "err", err)
  1479. }
  1480. // Update Valid* if we can.
  1481. prevotes := cs.Votes.Prevotes(cs.Round)
  1482. blockID, hasTwoThirds := prevotes.TwoThirdsMajority()
  1483. if hasTwoThirds && !blockID.IsZero() && (cs.ValidRound < cs.Round) {
  1484. if cs.ProposalBlock.HashesTo(blockID.Hash) {
  1485. cs.Logger.Info("Updating valid block to new proposal block",
  1486. "valid-round", cs.Round, "valid-block-hash", cs.ProposalBlock.Hash())
  1487. cs.ValidRound = cs.Round
  1488. cs.ValidBlock = cs.ProposalBlock
  1489. cs.ValidBlockParts = cs.ProposalBlockParts
  1490. }
  1491. // TODO: In case there is +2/3 majority in Prevotes set for some
  1492. // block and cs.ProposalBlock contains different block, either
  1493. // proposer is faulty or voting power of faulty processes is more
  1494. // than 1/3. We should trigger in the future accountability
  1495. // procedure at this point.
  1496. }
  1497. if cs.Step <= cstypes.RoundStepPropose && cs.isProposalComplete() {
  1498. // Move onto the next step
  1499. cs.enterPrevote(height, cs.Round)
  1500. if hasTwoThirds { // this is optimisation as this will be triggered when prevote is added
  1501. cs.enterPrecommit(height, cs.Round)
  1502. }
  1503. } else if cs.Step == cstypes.RoundStepCommit {
  1504. // If we're waiting on the proposal block...
  1505. cs.tryFinalizeCommit(height)
  1506. }
  1507. return added, nil
  1508. }
  1509. return added, nil
  1510. }
  1511. // Attempt to add the vote. if its a duplicate signature, dupeout the validator
  1512. func (cs *State) tryAddVote(vote *types.Vote, peerID p2p.NodeID) (bool, error) {
  1513. added, err := cs.addVote(vote, peerID)
  1514. if err != nil {
  1515. // If the vote height is off, we'll just ignore it,
  1516. // But if it's a conflicting sig, add it to the cs.evpool.
  1517. // If it's otherwise invalid, punish peer.
  1518. // nolint: gocritic
  1519. if voteErr, ok := err.(*types.ErrVoteConflictingVotes); ok {
  1520. if cs.privValidatorPubKey == nil {
  1521. return false, errPubKeyIsNotSet
  1522. }
  1523. if bytes.Equal(vote.ValidatorAddress, cs.privValidatorPubKey.Address()) {
  1524. cs.Logger.Error(
  1525. "Found conflicting vote from ourselves. Did you unsafe_reset a validator?",
  1526. "height",
  1527. vote.Height,
  1528. "round",
  1529. vote.Round,
  1530. "type",
  1531. vote.Type)
  1532. return added, err
  1533. }
  1534. var timestamp time.Time
  1535. if voteErr.VoteA.Height == cs.state.InitialHeight {
  1536. timestamp = cs.state.LastBlockTime // genesis time
  1537. } else {
  1538. timestamp = sm.MedianTime(cs.LastCommit.MakeCommit(), cs.LastValidators)
  1539. }
  1540. ev := types.NewDuplicateVoteEvidence(voteErr.VoteA, voteErr.VoteB, timestamp, cs.Validators)
  1541. evidenceErr := cs.evpool.AddEvidenceFromConsensus(ev)
  1542. if evidenceErr != nil {
  1543. cs.Logger.Error("Failed to add evidence to the evidence pool", "err", evidenceErr)
  1544. }
  1545. return added, err
  1546. } else if err == types.ErrVoteNonDeterministicSignature {
  1547. cs.Logger.Debug("Vote has non-deterministic signature", "err", err)
  1548. } else {
  1549. // Either
  1550. // 1) bad peer OR
  1551. // 2) not a bad peer? this can also err sometimes with "Unexpected step" OR
  1552. // 3) tmkms use with multiple validators connecting to a single tmkms instance
  1553. // (https://github.com/tendermint/tendermint/issues/3839).
  1554. cs.Logger.Info("Error attempting to add vote", "err", err)
  1555. return added, ErrAddingVote
  1556. }
  1557. }
  1558. return added, nil
  1559. }
  1560. //-----------------------------------------------------------------------------
  1561. // CONTRACT: cs.privValidator is not nil.
  1562. func (cs *State) signVote(
  1563. msgType tmproto.SignedMsgType,
  1564. hash []byte,
  1565. header types.PartSetHeader,
  1566. ) (*types.Vote, error) {
  1567. // Flush the WAL. Otherwise, we may not recompute the same vote to sign,
  1568. // and the privValidator will refuse to sign anything.
  1569. if err := cs.wal.FlushAndSync(); err != nil {
  1570. return nil, err
  1571. }
  1572. if cs.privValidatorPubKey == nil {
  1573. return nil, errPubKeyIsNotSet
  1574. }
  1575. addr := cs.privValidatorPubKey.Address()
  1576. valIdx, _ := cs.Validators.GetByAddress(addr)
  1577. vote := &types.Vote{
  1578. ValidatorAddress: addr,
  1579. ValidatorIndex: valIdx,
  1580. Height: cs.Height,
  1581. Round: cs.Round,
  1582. Timestamp: cs.voteTime(),
  1583. Type: msgType,
  1584. BlockID: types.BlockID{Hash: hash, PartSetHeader: header},
  1585. }
  1586. v := vote.ToProto()
  1587. err := cs.privValidator.SignVote(cs.state.ChainID, v)
  1588. vote.Signature = v.Signature
  1589. return vote, err
  1590. }
  1591. func (cs *State) voteTime() time.Time {
  1592. now := tmtime.Now()
  1593. minVoteTime := now
  1594. // TODO: We should remove next line in case we don't vote for v in case cs.ProposalBlock == nil,
  1595. // even if cs.LockedBlock != nil. See https://docs.tendermint.com/master/spec/.
  1596. timeIota := time.Duration(cs.state.ConsensusParams.Block.TimeIotaMs) * time.Millisecond
  1597. if cs.LockedBlock != nil {
  1598. // See the BFT time spec https://docs.tendermint.com/master/spec/consensus/bft-time.html
  1599. minVoteTime = cs.LockedBlock.Time.Add(timeIota)
  1600. } else if cs.ProposalBlock != nil {
  1601. minVoteTime = cs.ProposalBlock.Time.Add(timeIota)
  1602. }
  1603. if now.After(minVoteTime) {
  1604. return now
  1605. }
  1606. return minVoteTime
  1607. }
  1608. // sign the vote and publish on internalMsgQueue
  1609. func (cs *State) signAddVote(msgType tmproto.SignedMsgType, hash []byte, header types.PartSetHeader) *types.Vote {
  1610. if cs.privValidator == nil { // the node does not have a key
  1611. return nil
  1612. }
  1613. if cs.privValidatorPubKey == nil {
  1614. // Vote won't be signed, but it's not critical.
  1615. cs.Logger.Error(fmt.Sprintf("signAddVote: %v", errPubKeyIsNotSet))
  1616. return nil
  1617. }
  1618. // If the node not in the validator set, do nothing.
  1619. if !cs.Validators.HasAddress(cs.privValidatorPubKey.Address()) {
  1620. return nil
  1621. }
  1622. // TODO: pass pubKey to signVote
  1623. vote, err := cs.signVote(msgType, hash, header)
  1624. if err == nil {
  1625. cs.sendInternalMessage(msgInfo{&VoteMessage{vote}, ""})
  1626. cs.Logger.Info("Signed and pushed vote", "height", cs.Height, "round", cs.Round, "vote", vote)
  1627. return vote
  1628. }
  1629. // if !cs.replayMode {
  1630. cs.Logger.Error("Error signing vote", "height", cs.Height, "round", cs.Round, "vote", vote, "err", err)
  1631. //}
  1632. return nil
  1633. }
  1634. // updatePrivValidatorPubKey get's the private validator public key and
  1635. // memoizes it. This func returns an error if the private validator is not
  1636. // responding or responds with an error.
  1637. func (cs *State) updatePrivValidatorPubKey() error {
  1638. if cs.privValidator == nil {
  1639. return nil
  1640. }
  1641. pubKey, err := cs.privValidator.GetPubKey()
  1642. if err != nil {
  1643. return err
  1644. }
  1645. cs.privValidatorPubKey = pubKey
  1646. return nil
  1647. }
  1648. // look back to check existence of the node's consensus votes before joining consensus
  1649. func (cs *State) checkDoubleSigningRisk(height int64) error {
  1650. if cs.privValidator != nil && cs.privValidatorPubKey != nil && cs.config.DoubleSignCheckHeight > 0 && height > 0 {
  1651. valAddr := cs.privValidatorPubKey.Address()
  1652. doubleSignCheckHeight := cs.config.DoubleSignCheckHeight
  1653. if doubleSignCheckHeight > height {
  1654. doubleSignCheckHeight = height
  1655. }
  1656. for i := int64(1); i < doubleSignCheckHeight; i++ {
  1657. lastCommit := cs.blockStore.LoadSeenCommit(height - i)
  1658. if lastCommit != nil {
  1659. for sigIdx, s := range lastCommit.Signatures {
  1660. if s.BlockIDFlag == types.BlockIDFlagCommit && bytes.Equal(s.ValidatorAddress, valAddr) {
  1661. cs.Logger.Info("Found signature from the same key", "sig", s, "idx", sigIdx, "height", height-i)
  1662. return ErrSignatureFoundInPastBlocks
  1663. }
  1664. }
  1665. }
  1666. }
  1667. }
  1668. return nil
  1669. }
  1670. //---------------------------------------------------------
  1671. func CompareHRS(h1 int64, r1 int32, s1 cstypes.RoundStepType, h2 int64, r2 int32, s2 cstypes.RoundStepType) int {
  1672. if h1 < h2 {
  1673. return -1
  1674. } else if h1 > h2 {
  1675. return 1
  1676. }
  1677. if r1 < r2 {
  1678. return -1
  1679. } else if r1 > r2 {
  1680. return 1
  1681. }
  1682. if s1 < s2 {
  1683. return -1
  1684. } else if s1 > s2 {
  1685. return 1
  1686. }
  1687. return 0
  1688. }
  1689. // repairWalFile decodes messages from src (until the decoder errors) and
  1690. // writes them to dst.
  1691. func repairWalFile(src, dst string) error {
  1692. in, err := os.Open(src)
  1693. if err != nil {
  1694. return err
  1695. }
  1696. defer in.Close()
  1697. out, err := os.Open(dst)
  1698. if err != nil {
  1699. return err
  1700. }
  1701. defer out.Close()
  1702. var (
  1703. dec = NewWALDecoder(in)
  1704. enc = NewWALEncoder(out)
  1705. )
  1706. // best-case repair (until first error is encountered)
  1707. for {
  1708. msg, err := dec.Decode()
  1709. if err != nil {
  1710. break
  1711. }
  1712. err = enc.Encode(msg)
  1713. if err != nil {
  1714. return fmt.Errorf("failed to encode msg: %w", err)
  1715. }
  1716. }
  1717. return nil
  1718. }