|
|
-
- \section{Tendermint consensus algorithm} \label{sec:tendermint}
-
- \newcommand\Disseminate{\textbf{Disseminate}}
-
- \newcommand\Proposal{\mathsf{PROPOSAL}}
- \newcommand\ProposalPart{\mathsf{PROPOSAL\mbox{-}PART}}
- \newcommand\PrePrepare{\mathsf{INIT}} \newcommand\Prevote{\mathsf{PREVOTE}}
- \newcommand\Precommit{\mathsf{PRECOMMIT}}
- \newcommand\Decision{\mathsf{DECISION}}
-
- \newcommand\ViewChange{\mathsf{VC}}
- \newcommand\ViewChangeAck{\mathsf{VC\mbox{-}ACK}}
- \newcommand\NewPrePrepare{\mathsf{VC\mbox{-}INIT}}
- \newcommand\coord{\mathsf{proposer}}
-
- \newcommand\newHeight{newHeight} \newcommand\newRound{newRound}
- \newcommand\nil{nil} \newcommand\id{id} \newcommand{\propose}{propose}
- \newcommand\prevote{prevote} \newcommand\prevoteWait{prevoteWait}
- \newcommand\precommit{precommit} \newcommand\precommitWait{precommitWait}
- \newcommand\commit{commit}
-
- \newcommand\timeoutPropose{timeoutPropose}
- \newcommand\timeoutPrevote{timeoutPrevote}
- \newcommand\timeoutPrecommit{timeoutPrecommit}
- \newcommand\proofOfLocking{proof\mbox{-}of\mbox{-}locking}
-
- \begin{algorithm}[htb!] \def\baselinestretch{1} \scriptsize\raggedright
- \begin{algorithmic}[1]
- \SHORTSPACE
- \INIT{}
- \STATE $h_p := 0$
- \COMMENT{current height, or consensus instance we are currently executing}
- \STATE $round_p := 0$ \COMMENT{current round number}
- \STATE $step_p \in \set{\propose, \prevote, \precommit}$
- \STATE $decision_p[] := nil$
- \STATE $lockedValue_p := nil$
- \STATE $lockedRound_p := -1$
- \STATE $validValue_p := nil$
- \STATE $validRound_p := -1$
- \ENDINIT
- \SHORTSPACE
- \STATE \textbf{upon} start \textbf{do} $StartRound(0)$
- \SHORTSPACE
- \FUNCTION{$StartRound(round)$} \label{line:tab:startRound}
- \STATE $round_p \assign round$
- \STATE $step_p \assign \propose$
- \IF{$\coord(h_p, round_p) = p$}
- \IF{$validValue_p \neq \nil$} \label{line:tab:isThereLockedValue}
- \STATE $proposal \assign validValue_p$ \ELSE \STATE $proposal \assign
- getValue()$
- \label{line:tab:getValidValue}
- \ENDIF
- \STATE \Broadcast\ $\li{\Proposal,h_p, round_p, proposal, validRound_p}$
- \label{line:tab:send-proposal}
- \ELSE
- \STATE \textbf{schedule} $OnTimeoutPropose(h_p,
- round_p)$ to be executed \textbf{after} $\timeoutPropose(round_p)$
- \ENDIF
- \ENDFUNCTION
-
- \SPACE
- \UPON{$\li{\Proposal,h_p,round_p, v, -1}$ \From\ $\coord(h_p,round_p)$
- \With\ $step_p = \propose$} \label{line:tab:recvProposal}
- \IF{$valid(v) \wedge (lockedRound_p = -1 \vee lockedValue_p = v$)}
- \label{line:tab:accept-proposal-2}
- \STATE \Broadcast \ $\li{\Prevote,h_p,round_p,id(v)}$
- \label{line:tab:prevote-proposal}
- \ELSE
- \label{line:tab:acceptProposal1}
- \STATE \Broadcast \ $\li{\Prevote,h_p,round_p,\nil}$
- \label{line:tab:prevote-nil}
- \ENDIF
- \STATE $step_p \assign \prevote$ \label{line:tab:setStateToPrevote1}
- \ENDUPON
-
- \SPACE
- \UPON{$\li{\Proposal,h_p,round_p, v, vr}$ \From\ $\coord(h_p,round_p)$
- \textbf{AND} $2f+1$ $\li{\Prevote,h_p, vr,id(v)}$ \With\ $step_p = \propose \wedge (vr \ge 0 \wedge vr < round_p)$}
- \label{line:tab:acceptProposal}
- \IF{$valid(v) \wedge (lockedRound_p \le vr
- \vee lockedValue_p = v)$} \label{line:tab:cond-prevote-higher-proposal}
- \STATE \Broadcast \ $\li{\Prevote,h_p,round_p,id(v)}$
- \label{line:tab:prevote-higher-proposal}
- \ELSE
- \label{line:tab:acceptProposal2}
- \STATE \Broadcast \ $\li{\Prevote,h_p,round_p,\nil}$
- \label{line:tab:prevote-nil2}
- \ENDIF
- \STATE $step_p \assign \prevote$ \label{line:tab:setStateToPrevote3}
- \ENDUPON
-
- \SPACE
- \UPON{$2f+1$ $\li{\Prevote,h_p, round_p,*}$ \With\ $step_p = \prevote$ for the first time}
- \label{line:tab:recvAny2/3Prevote}
- \STATE \textbf{schedule} $OnTimeoutPrevote(h_p, round_p)$ to be executed \textbf{after} $\timeoutPrevote(round_p)$ \label{line:tab:timeoutPrevote}
- \ENDUPON
-
- \SPACE
- \UPON{$\li{\Proposal,h_p,round_p, v, *}$ \From\ $\coord(h_p,round_p)$
- \textbf{AND} $2f+1$ $\li{\Prevote,h_p, round_p,id(v)}$ \With\ $valid(v) \wedge step_p \ge \prevote$ for the first time}
- \label{line:tab:recvPrevote}
- \IF{$step_p = \prevote$}
- \STATE $lockedValue_p \assign v$ \label{line:tab:setLockedValue}
- \STATE $lockedRound_p \assign round_p$ \label{line:tab:setLockedRound}
- \STATE \Broadcast \ $\li{\Precommit,h_p,round_p,id(v))}$
- \label{line:tab:precommit-v}
- \STATE $step_p \assign \precommit$ \label{line:tab:setStateToCommit}
- \ENDIF
- \STATE $validValue_p \assign v$ \label{line:tab:setValidRound}
- \STATE $validRound_p \assign round_p$ \label{line:tab:setValidValue}
- \ENDUPON
-
- \SHORTSPACE
- \UPON{$2f+1$ $\li{\Prevote,h_p,round_p, \nil}$
- \With\ $step_p = \prevote$}
- \STATE \Broadcast \ $\li{\Precommit,h_p,round_p, \nil}$
- \label{line:tab:precommit-v-1}
- \STATE $step_p \assign \precommit$
- \ENDUPON
-
- \SPACE
- \UPON{$2f+1$ $\li{\Precommit,h_p,round_p,*}$ for the first time}
- \label{line:tab:startTimeoutPrecommit}
- \STATE \textbf{schedule} $OnTimeoutPrecommit(h_p, round_p)$ to be executed \textbf{after} $\timeoutPrecommit(round_p)$
-
- \ENDUPON
-
- \SPACE
- \UPON{$\li{\Proposal,h_p,r, v, *}$ \From\ $\coord(h_p,r)$ \textbf{AND}
- $2f+1$ $\li{\Precommit,h_p,r,id(v)}$ \With\ $decision_p[h_p] = \nil$}
- \label{line:tab:onDecideRule}
- \IF{$valid(v)$} \label{line:tab:validDecisionValue}
- \STATE $decision_p[h_p] = v$ \label{line:tab:decide}
- \STATE$h_p \assign h_p + 1$ \label{line:tab:increaseHeight}
- \STATE reset $lockedRound_p$, $lockedValue_p$, $validRound_p$ and $validValue_p$ to initial values
- and empty message log
- \STATE $StartRound(0)$
- \ENDIF
- \ENDUPON
-
- \SHORTSPACE
- \UPON{$f+1$ $\li{*,h_p,round, *, *}$ \textbf{with} $round > round_p$}
- \label{line:tab:skipRounds}
- \STATE $StartRound(round)$ \label{line:tab:nextRound2}
- \ENDUPON
-
- \SHORTSPACE
- \FUNCTION{$OnTimeoutPropose(height,round)$} \label{line:tab:onTimeoutPropose}
- \IF{$height = h_p \wedge round = round_p \wedge step_p = \propose$}
- \STATE \Broadcast \ $\li{\Prevote,h_p,round_p, \nil}$
- \label{line:tab:prevote-nil-on-timeout}
- \STATE $step_p \assign \prevote$
- \ENDIF
- \ENDFUNCTION
-
- \SHORTSPACE
- \FUNCTION{$OnTimeoutPrevote(height,round)$} \label{line:tab:onTimeoutPrevote}
- \IF{$height = h_p \wedge round = round_p \wedge step_p = \prevote$}
- \STATE \Broadcast \ $\li{\Precommit,h_p,round_p,\nil}$
- \label{line:tab:precommit-nil-onTimeout}
- \STATE $step_p \assign \precommit$
- \ENDIF
- \ENDFUNCTION
-
- \SHORTSPACE
- \FUNCTION{$OnTimeoutPrecommit(height,round)$} \label{line:tab:onTimeoutPrecommit}
- \IF{$height = h_p \wedge round = round_p$}
- \STATE $StartRound(round_p + 1)$ \label{line:tab:nextRound}
- \ENDIF
- \ENDFUNCTION
- \end{algorithmic} \caption{Tendermint consensus algorithm}
- \label{alg:tendermint}
- \end{algorithm}
-
- In this section we present the Tendermint Byzantine fault-tolerant consensus
- algorithm. The algorithm is specified by the pseudo-code shown in
- Algorithm~\ref{alg:tendermint}. We present the algorithm as a set of \emph{upon
- rules} that are executed atomically\footnote{In case several rules are active
- at the same time, the first rule to be executed is picked randomly. The
- correctness of the algorithm does not depend on the order in which rules are
- executed.}. We assume that processes exchange protocol messages using a gossip
- protocol and that both sent and received messages are stored in a local message
- log for every process. An upon rule is triggered once the message log contains
- messages such that the corresponding condition evaluates to $\tt{true}$. The
- condition that assumes reception of $X$ messages of a particular type and
- content denotes reception of messages whose senders have aggregate voting power at
- least equal to $X$. For example, the condition $2f+1$ $\li{\Precommit,h_p,r,id(v)}$,
- evaluates to true upon reception of $\Precommit$ messages for height $h_p$,
- a round $r$ and with value equal to $id(v)$ whose senders have aggregate voting
- power at least equal to $2f+1$. Some of the rules ends with "for the first time" constraint
- to denote that it is triggered only the first time a corresponding condition evaluates
- to $\tt{true}$. This is because those rules do not always change the state of algorithm
- variables so without this constraint, the algorithm could keep
- executing those rules forever. The variables with index $p$ are process local state
- variables, while variables without index $p$ are value placeholders. The sign
- $*$ denotes any value.
-
- We denote with $n$ the total voting power of processes in the system, and we
- assume that the total voting power of faulty processes in the system is bounded
- with a system parameter $f$. The algorithm assumes that $n > 3f$, i.e., it
- requires that the total voting power of faulty processes is smaller than one
- third of the total voting power. For simplicity we present the algorithm for
- the case $n = 3f + 1$.
-
- The algorithm proceeds in rounds, where each round has a dedicated
- \emph{proposer}. The mapping of rounds to proposers is known to all processes
- and is given as a function $\coord(h, round)$, returning the proposer for
- the round $round$ in the consensus instance $h$. We
- assume that the proposer selection function is weighted round-robin, where
- processes are rotated proportional to their voting power\footnote{A validator
- with more voting power is selected more frequently, proportional to its power.
- More precisely, during a sequence of rounds of size $n$, every process is
- proposer in a number of rounds equal to its voting power.}.
- The internal protocol state transitions are triggered by message reception and
- by expiration of timeouts. There are three timeouts in Algorithm \ref{alg:tendermint}:
- $\timeoutPropose$, $\timeoutPrevote$ and $\timeoutPrecommit$.
- The timeouts prevent the algorithm from blocking and
- waiting forever for some condition to be true, ensure that processes continuously
- transition between rounds, and guarantee that eventually (after GST) communication
- between correct processes is timely and reliable so they can decide.
- The last role is achieved by increasing the timeouts with every new round $r$,
- i.e, $timeoutX(r) = initTimeoutX + r*timeoutDelta$;
- they are reset for every new height (consensus
- instance).
-
- Processes exchange the following messages in Tendermint: $\Proposal$,
- $\Prevote$ and $\Precommit$. The $\Proposal$ message is used by the proposer of
- the current round to suggest a potential decision value, while $\Prevote$ and
- $\Precommit$ are votes for a proposed value. According to the classification of
- consensus algorithms from \cite{RMS10:dsn}, Tendermint, like PBFT
- \cite{CL02:tcs} and DLS \cite{DLS88:jacm}, belongs to class 3, so it requires
- two voting steps (three communication exchanges in total) to decide a value.
- The Tendermint consensus algorithm is designed for the blockchain context where
- the value to decide is a block of transactions (ie. it is potentially quite
- large, consisting of many transactions). Therefore, in the Algorithm
- \ref{alg:tendermint} (similar as in \cite{CL02:tcs}) we are explicit about
- sending a value (block of transactions) and a small, constant size value id (a
- unique value identifier, normally a hash of the value, i.e., if $\id(v) =
- \id(v')$, then $v=v'$). The $\Proposal$ message is the only one carrying the
- value; $\Prevote$ and $\Precommit$ messages carry the value id. A correct
- process decides on a value $v$ in Tendermint upon receiving the $\Proposal$ for
- $v$ and $2f+1$ voting-power equivalent $\Precommit$ messages for $\id(v)$ in
- some round $r$. In order to send $\Precommit$ message for $v$ in a round $r$, a
- correct process waits to receive the $\Proposal$ and $2f+1$ of the
- corresponding $\Prevote$ messages in the round $r$. Otherwise,
- it sends $\Precommit$ message with a special $\nil$ value.
- This ensures that correct processes can $\Precommit$ only a
- single value (or $\nil$) in a round. As
- proposers may be faulty, the proposed value is treated by correct processes as
- a suggestion (it is not blindly accepted), and a correct process tells others
- if it accepted the $\Proposal$ for value $v$ by sending $\Prevote$ message for
- $\id(v)$; otherwise it sends $\Prevote$ message with the special $\nil$ value.
-
- Every process maintains the following variables in the Algorithm
- \ref{alg:tendermint}: $step$, $lockedValue$, $lockedRound$, $validValue$ and
- $validRound$. The $step$ denotes the current state of the internal Tendermint
- state machine, i.e., it reflects the stage of the algorithm execution in the
- current round. The $lockedValue$ stores the most recent value (with respect to
- a round number) for which a $\Precommit$ message has been sent. The
- $lockedRound$ is the last round in which the process sent a $\Precommit$
- message that is not $\nil$. We also say that a correct process locks a value
- $v$ in a round $r$ by setting $lockedValue = v$ and $lockedRound = r$ before
- sending $\Precommit$ message for $\id(v)$. As a correct process can decide a
- value $v$ only if $2f+1$ $\Precommit$ messages for $\id(v)$ are received, this
- implies that a possible decision value is a value that is locked by at least
- $f+1$ voting power equivalent of correct processes. Therefore, any value $v$
- for which $\Proposal$ and $2f+1$ of the corresponding $\Prevote$ messages are
- received in some round $r$ is a \emph{possible decision} value. The role of the
- $validValue$ variable is to store the most recent possible decision value; the
- $validRound$ is the last round in which $validValue$ is updated. Apart from
- those variables, a process also stores the current consensus instance ($h_p$,
- called \emph{height} in Tendermint), and the current round number ($round_p$)
- and attaches them to every message. Finally, a process also stores an array of
- decisions, $decision_p$ (Tendermint assumes a sequence of consensus instances,
- one for each height).
-
- Every round starts by a proposer suggesting a value with the $\Proposal$
- message (see line \ref{line:tab:send-proposal}). In the initial round of each
- height, the proposer is free to chose the value to suggest. In the
- Algorithm~\ref{alg:tendermint}, a correct process obtains a value to propose
- using an external function $getValue()$ that returns a valid value to
- propose. In the following rounds, a correct proposer will suggest a new value
- only if $validValue = \nil$; otherwise $validValue$ is proposed (see
- lines~\ref{line:tab:isThereLockedValue}-\ref{line:tab:getValidValue}).
- In addition to the value proposed, the $\Proposal$ message also
- contains the $validRound$ so other processes are informed about the last round
- in which the proposer observed $validValue$ as a possible decision value.
- Note that if a correct proposer $p$ sends $validValue$ with the $validRound$ in the
- $\Proposal$, this implies that the process $p$ received $\Proposal$ and the
- corresponding $2f+1$ $\Prevote$ messages for $validValue$ in the round
- $validRound$.
- If a correct process sends $\Proposal$ message with $validValue$ ($validRound > -1$)
- at time $t > GST$, by the \emph{Gossip communication} property, the
- corresponding $\Proposal$ and the $\Prevote$ messages will be received by all
- correct processes before time $t+\Delta$. Therefore, all correct processes will
- be able to verify the correctness of the suggested value as it is supported by
- the $\Proposal$ and the corresponding $2f+1$ voting power equivalent $\Prevote$
- messages.
-
- A correct process $p$ accepts the proposal for a value $v$ (send $\Prevote$
- for $id(v)$) if an external \emph{valid} function returns $true$ for the value
- $v$, and if $p$ hasn't locked any value ($lockedRound = -1$) or $p$ has locked
- the value $v$ ($lockedValue = v$); see the line
- \ref{line:tab:accept-proposal-2}. In case the proposed pair is $(v,vr \ge 0)$ and a
- correct process $p$ has locked some value, it will accept
- $v$ if it is a more recent possible decision value\footnote{As
- explained above, the possible decision value in a round $r$ is the one for
- which $\Proposal$ and the corresponding $2f+1$ $\Prevote$ messages are received
- for the round $r$.}, $vr > lockedRound_p$, or if $lockedValue = v$
- (see line~\ref{line:tab:cond-prevote-higher-proposal}). Otherwise, a correct
- process will reject the proposal by sending $\Prevote$ message with $\nil$
- value. A correct process will send $\Prevote$ message with $\nil$ value also in
- case $\timeoutPropose$ expired (it is triggered when a correct process starts a
- new round) and a process has not sent $\Prevote$ message in the current round
- yet (see the line \ref{line:tab:onTimeoutPropose}).
-
- If a correct process receives $\Proposal$ message for some value $v$ and $2f+1$
- $\Prevote$ messages for $\id(v)$, then it sends $\Precommit$ message with
- $\id(v)$. Otherwise, it sends $\Precommit$ $\nil$. A correct process will send
- $\Precommit$ message with $\nil$ value also in case $\timeoutPrevote$ expired
- (it is started when a correct process sent $\Prevote$ message and received any
- $2f+1$ $\Prevote$ messages) and a process has not sent $\Precommit$ message in
- the current round yet (see the line \ref{line:tab:onTimeoutPrecommit}). A
- correct process decides on some value $v$ if it receives in some round $r$
- $\Proposal$ message for $v$ and $2f+1$ $\Precommit$ messages with $\id(v)$ (see
- the line \ref{line:tab:decide}). To prevent the algorithm from blocking and
- waiting forever for this condition to be true, the Algorithm
- \ref{alg:tendermint} relies on $\timeoutPrecommit$. It is triggered after a
- process receives any set of $2f+1$ $\Precommit$ messages for the current round.
- If the $\timeoutPrecommit$ expires and a process has not decided yet, the
- process starts the next round (see the line \ref{line:tab:onTimeoutPrecommit}).
- When a correct process $p$ decides, it starts the next consensus instance
- (for the next height). The \emph{Gossip communication} property ensures
- that $\Proposal$ and $2f+1$ $\Prevote$ messages that led $p$ to decide
- are eventually received by all correct processes, so they will also decide.
-
- \subsection{Termination mechanism}
-
- Tendermint ensures termination by a novel mechanism that benefits from the
- gossip based nature of communication (see \emph{Gossip communication}
- property). It requires managing two additional variables, $validValue$ and
- $validRound$ that are then used by the proposer during the propose step as
- explained above. The $validValue$ and $validRound$ are updated to $v$ and $r$
- by a correct process in a round $r$ when the process receives valid $\Proposal$
- message for the value $v$ and the corresponding $2f+1$ $\Prevote$ messages for
- $id(v)$ in the round $r$ (see the rule at line~\ref{line:tab:recvPrevote}).
-
- We now give briefly the intuition how managing and proposing $validValue$
- and $validRound$ ensures termination. Formal treatment is left for
- Section~\ref{sec:proof}.
-
- The first thing to note is that during good period, because of the
- \emph{Gossip communication} property, if a correct process $p$ locks a value
- $v$ in some round $r$, all correct processes will update $validValue$ to $v$
- and $validRound$ to $r$ before the end of the round $r$ (we prove this formally
- in the Section~\ref{sec:proof}). The intuition is that messages that led to $p$
- locking a value $v$ in the round $r$ will be gossiped to all correct processes
- before the end of the round $r$, so it will update $validValue$ and
- $validRound$ (the line~\ref{line:tab:recvPrevote}). Therefore, if a correct
- process locks some value during good period, $validValue$ and $validRound$ are
- updated by all correct processes so that the value proposed in the following
- rounds will be acceptable by all correct processes. Note
- that it could happen that during good period, no correct process locks a value,
- but some correct process $q$ updates $validValue$ and $validRound$ during some
- round. As no correct process locks a value in this case, $validValue_q$ and
- $validRound_q$ will also be acceptable by all correct processes as
- $validRound_q > lockedRound_c$ for every correct process $c$ and as the
- \emph{Gossip communication} property ensures that the corresponding $\Prevote$
- messages that $q$ received in the round $validRound_q$ are received by all
- correct processes $\Delta$ time later.
-
- Finally, it could happen that after GST, there is a long sequence of rounds in which
- no correct process neither locks a value nor update $validValue$ and $validRound$.
- In this case, during this sequence of rounds, the proposed value suggested by correct
- processes was not accepted by all correct processes. Note that this sequence of rounds
- is always finite as at the beginning of every
- round there is at least a single correct process $c$ such that $validValue_c$
- and $validRound_c$ are acceptable by every correct process. This is true as
- there exists a correct process $c$ such that for every other correct process
- $p$, $validRound_c > lockedRound_p$ or $validValue_c = lockedValue_p$. This is
- true as $c$ is the process that has locked a value in the most recent round
- among all correct processes (or no correct process locked any value). Therefore,
- eventually $c$ will be the proper in some round and the proposed value will be accepted
- by all correct processes, terminating therefore this sequence of
- rounds.
-
- Therefore, updating $validValue$ and $validRound$ variables, and the
- \emph{Gossip communication} property, together ensures that eventually, during
- the good period, there exists a round with a correct proposer whose proposed
- value will be accepted by all correct processes, and all correct processes will
- terminate in that round. Note that this mechanism, contrary to the common
- termination mechanism illustrated in the
- Figure~\ref{ch3:fig:coordinator-change}, does not require exchanging any
- additional information in addition to messages already sent as part of what is
- normally being called "normal" case.
-
|