From e1a3f16fa48d9f5ef0a245d2949e6dca4fd22578 Mon Sep 17 00:00:00 2001 From: Jae Kwon Date: Tue, 15 May 2018 08:48:27 -0700 Subject: [PATCH] Comment tweaks --- consensus/replay.go | 60 +++++++++++++++++++++++++-------------------- consensus/state.go | 25 +++++++++++-------- 2 files changed, 49 insertions(+), 36 deletions(-) diff --git a/consensus/replay.go b/consensus/replay.go index 5b5a48425..265ab5388 100644 --- a/consensus/replay.go +++ b/consensus/replay.go @@ -26,20 +26,24 @@ import ( var crc32c = crc32.MakeTable(crc32.Castagnoli) // Functionality to replay blocks and messages on recovery from a crash. -// There are two general failure scenarios: failure during consensus, and failure while applying the block. -// The former is handled by the WAL, the latter by the proxyApp Handshake on restart, -// which ultimately hands off the work to the WAL. +// There are two general failure scenarios: +// +// 1. failure during consensus +// 2. failure while applying the block +// +// The former is handled by the WAL, the latter by the proxyApp Handshake on +// restart, which ultimately hands off the work to the WAL. //----------------------------------------- -// recover from failure during consensus -// by replaying messages from the WAL +// 1. Recover from failure during consensus +// (by replaying messages from the WAL) +//----------------------------------------- -// Unmarshal and apply a single message to the consensus state -// as if it were received in receiveRoutine -// Lines that start with "#" are ignored. -// NOTE: receiveRoutine should not be running +// Unmarshal and apply a single message to the consensus state as if it were +// received in receiveRoutine. Lines that start with "#" are ignored. +// NOTE: receiveRoutine should not be running. func (cs *ConsensusState) readReplayMessage(msg *TimedWALMessage, newStepCh chan interface{}) error { - // skip meta messages + // Skip meta messages which exist for demarcating boundaries. if _, ok := msg.Msg.(EndHeightMessage); ok { return nil } @@ -89,17 +93,18 @@ func (cs *ConsensusState) readReplayMessage(msg *TimedWALMessage, newStepCh chan return nil } -// replay only those messages since the last block. -// timeoutRoutine should run concurrently to read off tickChan +// Replay only those messages since the last block. `timeoutRoutine` should +// run concurrently to read off tickChan. func (cs *ConsensusState) catchupReplay(csHeight int64) error { - // set replayMode + + // Set replayMode to true so we don't log signing errors. cs.replayMode = true defer func() { cs.replayMode = false }() - // Ensure that ENDHEIGHT for this height doesn't exist. + // Ensure that #ENDHEIGHT for this height doesn't exist. // NOTE: This is just a sanity check. As far as we know things work fine // without it, and Handshake could reuse ConsensusState if it weren't for - // this check (since we can crash after writing ENDHEIGHT). + // this check (since we can crash after writing #ENDHEIGHT). // // Ignore data corruption errors since this is a sanity check. gr, found, err := cs.wal.SearchForEndHeight(csHeight, &WALSearchOptions{IgnoreDataCorruptionErrors: true}) @@ -115,7 +120,7 @@ func (cs *ConsensusState) catchupReplay(csHeight int64) error { return fmt.Errorf("WAL should not contain #ENDHEIGHT %d", csHeight) } - // Search for last height marker + // Search for last height marker. // // Ignore data corruption errors in previous heights because we only care about last height gr, found, err = cs.wal.SearchForEndHeight(csHeight-1, &WALSearchOptions{IgnoreDataCorruptionErrors: true}) @@ -182,10 +187,11 @@ func makeHeightSearchFunc(height int64) auto.SearchFunc { } }*/ -//---------------------------------------------- -// Recover from failure during block processing -// by handshaking with the app to figure out where -// we were last and using the WAL to recover there +//--------------------------------------------------- +// 2. Recover from failure while applying the block. +// (by handshaking with the app to figure out where +// we were last, and using the WAL to recover there.) +//--------------------------------------------------- type Handshaker struct { stateDB dbm.DB @@ -220,7 +226,8 @@ func (h *Handshaker) NBlocks() int { // TODO: retry the handshake/replay if it fails ? func (h *Handshaker) Handshake(proxyApp proxy.AppConns) error { - // handshake is done via info request on the query conn + + // Handshake is done via ABCI Info on the query conn. res, err := proxyApp.Query().InfoSync(abci.RequestInfo{version.Version}) if err != nil { return fmt.Errorf("Error calling Info: %v", err) @@ -234,15 +241,16 @@ func (h *Handshaker) Handshake(proxyApp proxy.AppConns) error { h.logger.Info("ABCI Handshake", "appHeight", blockHeight, "appHash", fmt.Sprintf("%X", appHash)) - // TODO: check version + // TODO: check app version. - // replay blocks up to the latest in the blockstore + // Replay blocks up to the latest in the blockstore. _, err = h.ReplayBlocks(h.initialState, appHash, blockHeight, proxyApp) if err != nil { return fmt.Errorf("Error on replay: %v", err) } - h.logger.Info("Completed ABCI Handshake - Tendermint and App are synced", "appHeight", blockHeight, "appHash", fmt.Sprintf("%X", appHash)) + h.logger.Info("Completed ABCI Handshake - Tendermint and App are synced", + "appHeight", blockHeight, "appHash", fmt.Sprintf("%X", appHash)) // TODO: (on restart) replay mempool @@ -250,7 +258,7 @@ func (h *Handshaker) Handshake(proxyApp proxy.AppConns) error { } // Replay all blocks since appBlockHeight and ensure the result matches the current state. -// Returns the final AppHash or an error +// Returns the final AppHash or an error. func (h *Handshaker) ReplayBlocks(state sm.State, appHash []byte, appBlockHeight int64, proxyApp proxy.AppConns) ([]byte, error) { storeBlockHeight := h.store.Height() @@ -314,7 +322,7 @@ func (h *Handshaker) ReplayBlocks(state sm.State, appHash []byte, appBlockHeight // We haven't run Commit (both the state and app are one block behind), // so replayBlock with the real app. // NOTE: We could instead use the cs.WAL on cs.Start, - // but we'd have to allow the WAL to replay a block that wrote it's ENDHEIGHT + // but we'd have to allow the WAL to replay a block that wrote it's #ENDHEIGHT h.logger.Info("Replay last block using real app") state, err = h.replayBlock(state, storeBlockHeight, proxyApp.Consensus()) return state.AppHash, err diff --git a/consensus/state.go b/consensus/state.go index bee7efa2e..7592269bf 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -1210,23 +1210,28 @@ func (cs *ConsensusState) finalizeCommit(height int64) { fail.Fail() // XXX - // Finish writing to the WAL for this height. - // NOTE: If we fail before writing this, we'll never write it, - // and just recover by running ApplyBlock in the Handshake. - // If we moved it before persisting the block, we'd have to allow - // WAL replay for blocks with an #ENDHEIGHT - // As is, ConsensusState should not be started again - // until we successfully call ApplyBlock (ie. here or in Handshake after restart) + // Write EndHeightMessage{} for this height, implying that the blockstore + // has saved the block. + // + // If we crash before writing this EndHeightMessage{}, we will recover by + // running ApplyBlock during the ABCI handshake when we restart. If we + // didn't save the block to the blockstore before writing + // EndHeightMessage{}, we'd have to change WAL replay -- currently it + // complains about replaying for heights where an #ENDHEIGHT entry already + // exists. + // + // Either way, the ConsensusState should not be resumed until we + // successfully call ApplyBlock (ie. later here, or in Handshake after + // restart). cs.wal.Save(EndHeightMessage{height}) fail.Fail() // XXX - // Create a copy of the state for staging - // and an event cache for txs + // Create a copy of the state for staging and an event cache for txs. stateCopy := cs.state.Copy() // Execute and commit the block, update and save the state, and update the mempool. - // NOTE: the block.AppHash wont reflect these txs until the next block + // NOTE The block.AppHash wont reflect these txs until the next block. var err error stateCopy, err = cs.blockExec.ApplyBlock(stateCopy, types.BlockID{block.Hash(), blockParts.Header()}, block) if err != nil {