You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

397 lines
21 KiB

\section{Tendermint consensus algorithm} \label{sec:tendermint}
\newcommand\Disseminate{\textbf{Disseminate}}
\newcommand\Proposal{\mathsf{PROPOSAL}}
\newcommand\ProposalPart{\mathsf{PROPOSAL\mbox{-}PART}}
\newcommand\PrePrepare{\mathsf{INIT}} \newcommand\Prevote{\mathsf{PREVOTE}}
\newcommand\Precommit{\mathsf{PRECOMMIT}}
\newcommand\Decision{\mathsf{DECISION}}
\newcommand\ViewChange{\mathsf{VC}}
\newcommand\ViewChangeAck{\mathsf{VC\mbox{-}ACK}}
\newcommand\NewPrePrepare{\mathsf{VC\mbox{-}INIT}}
\newcommand\coord{\mathsf{proposer}}
\newcommand\newHeight{newHeight} \newcommand\newRound{newRound}
\newcommand\nil{nil} \newcommand\id{id} \newcommand{\propose}{propose}
\newcommand\prevote{prevote} \newcommand\prevoteWait{prevoteWait}
\newcommand\precommit{precommit} \newcommand\precommitWait{precommitWait}
\newcommand\commit{commit}
\newcommand\timeoutPropose{timeoutPropose}
\newcommand\timeoutPrevote{timeoutPrevote}
\newcommand\timeoutPrecommit{timeoutPrecommit}
\newcommand\proofOfLocking{proof\mbox{-}of\mbox{-}locking}
\begin{algorithm}[htb!] \def\baselinestretch{1} \scriptsize\raggedright
\begin{algorithmic}[1]
\SHORTSPACE
\INIT{}
\STATE $h_p := 0$
\COMMENT{current height, or consensus instance we are currently executing}
\STATE $round_p := 0$ \COMMENT{current round number}
\STATE $step_p \in \set{\propose, \prevote, \precommit}$
\STATE $decision_p[] := nil$
\STATE $lockedValue_p := nil$
\STATE $lockedRound_p := -1$
\STATE $validValue_p := nil$
\STATE $validRound_p := -1$
\ENDINIT
\SHORTSPACE
\STATE \textbf{upon} start \textbf{do} $StartRound(0)$
\SHORTSPACE
\FUNCTION{$StartRound(round)$} \label{line:tab:startRound}
\STATE $round_p \assign round$
\STATE $step_p \assign \propose$
\IF{$\coord(h_p, round_p) = p$}
\IF{$validValue_p \neq \nil$} \label{line:tab:isThereLockedValue}
\STATE $proposal \assign validValue_p$ \ELSE \STATE $proposal \assign
getValue()$
\label{line:tab:getValidValue}
\ENDIF
\STATE \Broadcast\ $\li{\Proposal,h_p, round_p, proposal, validRound_p}$
\label{line:tab:send-proposal}
\ELSE
\STATE \textbf{schedule} $OnTimeoutPropose(h_p,
round_p)$ to be executed \textbf{after} $\timeoutPropose(round_p)$
\ENDIF
\ENDFUNCTION
\SPACE
\UPON{$\li{\Proposal,h_p,round_p, v, -1}$ \From\ $\coord(h_p,round_p)$
\With\ $step_p = \propose$} \label{line:tab:recvProposal}
\IF{$valid(v) \wedge (lockedRound_p = -1 \vee lockedValue_p = v$)}
\label{line:tab:accept-proposal-2}
\STATE \Broadcast \ $\li{\Prevote,h_p,round_p,id(v)}$
\label{line:tab:prevote-proposal}
\ELSE
\label{line:tab:acceptProposal1}
\STATE \Broadcast \ $\li{\Prevote,h_p,round_p,\nil}$
\label{line:tab:prevote-nil}
\ENDIF
\STATE $step_p \assign \prevote$ \label{line:tab:setStateToPrevote1}
\ENDUPON
\SPACE
\UPON{$\li{\Proposal,h_p,round_p, v, vr}$ \From\ $\coord(h_p,round_p)$
\textbf{AND} $2f+1$ $\li{\Prevote,h_p, vr,id(v)}$ \With\ $step_p = \propose \wedge (vr \ge 0 \wedge vr < round_p)$}
\label{line:tab:acceptProposal}
\IF{$valid(v) \wedge (lockedRound_p \le vr
\vee lockedValue_p = v)$} \label{line:tab:cond-prevote-higher-proposal}
\STATE \Broadcast \ $\li{\Prevote,h_p,round_p,id(v)}$
\label{line:tab:prevote-higher-proposal}
\ELSE
\label{line:tab:acceptProposal2}
\STATE \Broadcast \ $\li{\Prevote,h_p,round_p,\nil}$
\label{line:tab:prevote-nil2}
\ENDIF
\STATE $step_p \assign \prevote$ \label{line:tab:setStateToPrevote3}
\ENDUPON
\SPACE
\UPON{$2f+1$ $\li{\Prevote,h_p, round_p,*}$ \With\ $step_p = \prevote$ for the first time}
\label{line:tab:recvAny2/3Prevote}
\STATE \textbf{schedule} $OnTimeoutPrevote(h_p, round_p)$ to be executed \textbf{after} $\timeoutPrevote(round_p)$ \label{line:tab:timeoutPrevote}
\ENDUPON
\SPACE
\UPON{$\li{\Proposal,h_p,round_p, v, *}$ \From\ $\coord(h_p,round_p)$
\textbf{AND} $2f+1$ $\li{\Prevote,h_p, round_p,id(v)}$ \With\ $valid(v) \wedge step_p \ge \prevote$ for the first time}
\label{line:tab:recvPrevote}
\IF{$step_p = \prevote$}
\STATE $lockedValue_p \assign v$ \label{line:tab:setLockedValue}
\STATE $lockedRound_p \assign round_p$ \label{line:tab:setLockedRound}
\STATE \Broadcast \ $\li{\Precommit,h_p,round_p,id(v))}$
\label{line:tab:precommit-v}
\STATE $step_p \assign \precommit$ \label{line:tab:setStateToCommit}
\ENDIF
\STATE $validValue_p \assign v$ \label{line:tab:setValidRound}
\STATE $validRound_p \assign round_p$ \label{line:tab:setValidValue}
\ENDUPON
\SHORTSPACE
\UPON{$2f+1$ $\li{\Prevote,h_p,round_p, \nil}$
\With\ $step_p = \prevote$}
\STATE \Broadcast \ $\li{\Precommit,h_p,round_p, \nil}$
\label{line:tab:precommit-v-1}
\STATE $step_p \assign \precommit$
\ENDUPON
\SPACE
\UPON{$2f+1$ $\li{\Precommit,h_p,round_p,*}$ for the first time}
\label{line:tab:startTimeoutPrecommit}
\STATE \textbf{schedule} $OnTimeoutPrecommit(h_p, round_p)$ to be executed \textbf{after} $\timeoutPrecommit(round_p)$
\ENDUPON
\SPACE
\UPON{$\li{\Proposal,h_p,r, v, *}$ \From\ $\coord(h_p,r)$ \textbf{AND}
$2f+1$ $\li{\Precommit,h_p,r,id(v)}$ \With\ $decision_p[h_p] = \nil$}
\label{line:tab:onDecideRule}
\IF{$valid(v)$} \label{line:tab:validDecisionValue}
\STATE $decision_p[h_p] = v$ \label{line:tab:decide}
\STATE$h_p \assign h_p + 1$ \label{line:tab:increaseHeight}
\STATE reset $lockedRound_p$, $lockedValue_p$, $validRound_p$ and $validValue_p$ to initial values
and empty message log
\STATE $StartRound(0)$
\ENDIF
\ENDUPON
\SHORTSPACE
\UPON{$f+1$ $\li{*,h_p,round, *, *}$ \textbf{with} $round > round_p$}
\label{line:tab:skipRounds}
\STATE $StartRound(round)$ \label{line:tab:nextRound2}
\ENDUPON
\SHORTSPACE
\FUNCTION{$OnTimeoutPropose(height,round)$} \label{line:tab:onTimeoutPropose}
\IF{$height = h_p \wedge round = round_p \wedge step_p = \propose$}
\STATE \Broadcast \ $\li{\Prevote,h_p,round_p, \nil}$
\label{line:tab:prevote-nil-on-timeout}
\STATE $step_p \assign \prevote$
\ENDIF
\ENDFUNCTION
\SHORTSPACE
\FUNCTION{$OnTimeoutPrevote(height,round)$} \label{line:tab:onTimeoutPrevote}
\IF{$height = h_p \wedge round = round_p \wedge step_p = \prevote$}
\STATE \Broadcast \ $\li{\Precommit,h_p,round_p,\nil}$
\label{line:tab:precommit-nil-onTimeout}
\STATE $step_p \assign \precommit$
\ENDIF
\ENDFUNCTION
\SHORTSPACE
\FUNCTION{$OnTimeoutPrecommit(height,round)$} \label{line:tab:onTimeoutPrecommit}
\IF{$height = h_p \wedge round = round_p$}
\STATE $StartRound(round_p + 1)$ \label{line:tab:nextRound}
\ENDIF
\ENDFUNCTION
\end{algorithmic} \caption{Tendermint consensus algorithm}
\label{alg:tendermint}
\end{algorithm}
In this section we present the Tendermint Byzantine fault-tolerant consensus
algorithm. The algorithm is specified by the pseudo-code shown in
Algorithm~\ref{alg:tendermint}. We present the algorithm as a set of \emph{upon
rules} that are executed atomically\footnote{In case several rules are active
at the same time, the first rule to be executed is picked randomly. The
correctness of the algorithm does not depend on the order in which rules are
executed.}. We assume that processes exchange protocol messages using a gossip
protocol and that both sent and received messages are stored in a local message
log for every process. An upon rule is triggered once the message log contains
messages such that the corresponding condition evaluates to $\tt{true}$. The
condition that assumes reception of $X$ messages of a particular type and
content denotes reception of messages whose senders have aggregate voting power at
least equal to $X$. For example, the condition $2f+1$ $\li{\Precommit,h_p,r,id(v)}$,
evaluates to true upon reception of $\Precommit$ messages for height $h_p$,
a round $r$ and with value equal to $id(v)$ whose senders have aggregate voting
power at least equal to $2f+1$. Some of the rules ends with "for the first time" constraint
to denote that it is triggered only the first time a corresponding condition evaluates
to $\tt{true}$. This is because those rules do not always change the state of algorithm
variables so without this constraint, the algorithm could keep
executing those rules forever. The variables with index $p$ are process local state
variables, while variables without index $p$ are value placeholders. The sign
$*$ denotes any value.
We denote with $n$ the total voting power of processes in the system, and we
assume that the total voting power of faulty processes in the system is bounded
with a system parameter $f$. The algorithm assumes that $n > 3f$, i.e., it
requires that the total voting power of faulty processes is smaller than one
third of the total voting power. For simplicity we present the algorithm for
the case $n = 3f + 1$.
The algorithm proceeds in rounds, where each round has a dedicated
\emph{proposer}. The mapping of rounds to proposers is known to all processes
and is given as a function $\coord(h, round)$, returning the proposer for
the round $round$ in the consensus instance $h$. We
assume that the proposer selection function is weighted round-robin, where
processes are rotated proportional to their voting power\footnote{A validator
with more voting power is selected more frequently, proportional to its power.
More precisely, during a sequence of rounds of size $n$, every process is
proposer in a number of rounds equal to its voting power.}.
The internal protocol state transitions are triggered by message reception and
by expiration of timeouts. There are three timeouts in Algorithm \ref{alg:tendermint}:
$\timeoutPropose$, $\timeoutPrevote$ and $\timeoutPrecommit$.
The timeouts prevent the algorithm from blocking and
waiting forever for some condition to be true, ensure that processes continuously
transition between rounds, and guarantee that eventually (after GST) communication
between correct processes is timely and reliable so they can decide.
The last role is achieved by increasing the timeouts with every new round $r$,
i.e, $timeoutX(r) = initTimeoutX + r*timeoutDelta$;
they are reset for every new height (consensus
instance).
Processes exchange the following messages in Tendermint: $\Proposal$,
$\Prevote$ and $\Precommit$. The $\Proposal$ message is used by the proposer of
the current round to suggest a potential decision value, while $\Prevote$ and
$\Precommit$ are votes for a proposed value. According to the classification of
consensus algorithms from \cite{RMS10:dsn}, Tendermint, like PBFT
\cite{CL02:tcs} and DLS \cite{DLS88:jacm}, belongs to class 3, so it requires
two voting steps (three communication exchanges in total) to decide a value.
The Tendermint consensus algorithm is designed for the blockchain context where
the value to decide is a block of transactions (ie. it is potentially quite
large, consisting of many transactions). Therefore, in the Algorithm
\ref{alg:tendermint} (similar as in \cite{CL02:tcs}) we are explicit about
sending a value (block of transactions) and a small, constant size value id (a
unique value identifier, normally a hash of the value, i.e., if $\id(v) =
\id(v')$, then $v=v'$). The $\Proposal$ message is the only one carrying the
value; $\Prevote$ and $\Precommit$ messages carry the value id. A correct
process decides on a value $v$ in Tendermint upon receiving the $\Proposal$ for
$v$ and $2f+1$ voting-power equivalent $\Precommit$ messages for $\id(v)$ in
some round $r$. In order to send $\Precommit$ message for $v$ in a round $r$, a
correct process waits to receive the $\Proposal$ and $2f+1$ of the
corresponding $\Prevote$ messages in the round $r$. Otherwise,
it sends $\Precommit$ message with a special $\nil$ value.
This ensures that correct processes can $\Precommit$ only a
single value (or $\nil$) in a round. As
proposers may be faulty, the proposed value is treated by correct processes as
a suggestion (it is not blindly accepted), and a correct process tells others
if it accepted the $\Proposal$ for value $v$ by sending $\Prevote$ message for
$\id(v)$; otherwise it sends $\Prevote$ message with the special $\nil$ value.
Every process maintains the following variables in the Algorithm
\ref{alg:tendermint}: $step$, $lockedValue$, $lockedRound$, $validValue$ and
$validRound$. The $step$ denotes the current state of the internal Tendermint
state machine, i.e., it reflects the stage of the algorithm execution in the
current round. The $lockedValue$ stores the most recent value (with respect to
a round number) for which a $\Precommit$ message has been sent. The
$lockedRound$ is the last round in which the process sent a $\Precommit$
message that is not $\nil$. We also say that a correct process locks a value
$v$ in a round $r$ by setting $lockedValue = v$ and $lockedRound = r$ before
sending $\Precommit$ message for $\id(v)$. As a correct process can decide a
value $v$ only if $2f+1$ $\Precommit$ messages for $\id(v)$ are received, this
implies that a possible decision value is a value that is locked by at least
$f+1$ voting power equivalent of correct processes. Therefore, any value $v$
for which $\Proposal$ and $2f+1$ of the corresponding $\Prevote$ messages are
received in some round $r$ is a \emph{possible decision} value. The role of the
$validValue$ variable is to store the most recent possible decision value; the
$validRound$ is the last round in which $validValue$ is updated. Apart from
those variables, a process also stores the current consensus instance ($h_p$,
called \emph{height} in Tendermint), and the current round number ($round_p$)
and attaches them to every message. Finally, a process also stores an array of
decisions, $decision_p$ (Tendermint assumes a sequence of consensus instances,
one for each height).
Every round starts by a proposer suggesting a value with the $\Proposal$
message (see line \ref{line:tab:send-proposal}). In the initial round of each
height, the proposer is free to chose the value to suggest. In the
Algorithm~\ref{alg:tendermint}, a correct process obtains a value to propose
using an external function $getValue()$ that returns a valid value to
propose. In the following rounds, a correct proposer will suggest a new value
only if $validValue = \nil$; otherwise $validValue$ is proposed (see
lines~\ref{line:tab:isThereLockedValue}-\ref{line:tab:getValidValue}).
In addition to the value proposed, the $\Proposal$ message also
contains the $validRound$ so other processes are informed about the last round
in which the proposer observed $validValue$ as a possible decision value.
Note that if a correct proposer $p$ sends $validValue$ with the $validRound$ in the
$\Proposal$, this implies that the process $p$ received $\Proposal$ and the
corresponding $2f+1$ $\Prevote$ messages for $validValue$ in the round
$validRound$.
If a correct process sends $\Proposal$ message with $validValue$ ($validRound > -1$)
at time $t > GST$, by the \emph{Gossip communication} property, the
corresponding $\Proposal$ and the $\Prevote$ messages will be received by all
correct processes before time $t+\Delta$. Therefore, all correct processes will
be able to verify the correctness of the suggested value as it is supported by
the $\Proposal$ and the corresponding $2f+1$ voting power equivalent $\Prevote$
messages.
A correct process $p$ accepts the proposal for a value $v$ (send $\Prevote$
for $id(v)$) if an external \emph{valid} function returns $true$ for the value
$v$, and if $p$ hasn't locked any value ($lockedRound = -1$) or $p$ has locked
the value $v$ ($lockedValue = v$); see the line
\ref{line:tab:accept-proposal-2}. In case the proposed pair is $(v,vr \ge 0)$ and a
correct process $p$ has locked some value, it will accept
$v$ if it is a more recent possible decision value\footnote{As
explained above, the possible decision value in a round $r$ is the one for
which $\Proposal$ and the corresponding $2f+1$ $\Prevote$ messages are received
for the round $r$.}, $vr > lockedRound_p$, or if $lockedValue = v$
(see line~\ref{line:tab:cond-prevote-higher-proposal}). Otherwise, a correct
process will reject the proposal by sending $\Prevote$ message with $\nil$
value. A correct process will send $\Prevote$ message with $\nil$ value also in
case $\timeoutPropose$ expired (it is triggered when a correct process starts a
new round) and a process has not sent $\Prevote$ message in the current round
yet (see the line \ref{line:tab:onTimeoutPropose}).
If a correct process receives $\Proposal$ message for some value $v$ and $2f+1$
$\Prevote$ messages for $\id(v)$, then it sends $\Precommit$ message with
$\id(v)$. Otherwise, it sends $\Precommit$ $\nil$. A correct process will send
$\Precommit$ message with $\nil$ value also in case $\timeoutPrevote$ expired
(it is started when a correct process sent $\Prevote$ message and received any
$2f+1$ $\Prevote$ messages) and a process has not sent $\Precommit$ message in
the current round yet (see the line \ref{line:tab:onTimeoutPrecommit}). A
correct process decides on some value $v$ if it receives in some round $r$
$\Proposal$ message for $v$ and $2f+1$ $\Precommit$ messages with $\id(v)$ (see
the line \ref{line:tab:decide}). To prevent the algorithm from blocking and
waiting forever for this condition to be true, the Algorithm
\ref{alg:tendermint} relies on $\timeoutPrecommit$. It is triggered after a
process receives any set of $2f+1$ $\Precommit$ messages for the current round.
If the $\timeoutPrecommit$ expires and a process has not decided yet, the
process starts the next round (see the line \ref{line:tab:onTimeoutPrecommit}).
When a correct process $p$ decides, it starts the next consensus instance
(for the next height). The \emph{Gossip communication} property ensures
that $\Proposal$ and $2f+1$ $\Prevote$ messages that led $p$ to decide
are eventually received by all correct processes, so they will also decide.
\subsection{Termination mechanism}
Tendermint ensures termination by a novel mechanism that benefits from the
gossip based nature of communication (see \emph{Gossip communication}
property). It requires managing two additional variables, $validValue$ and
$validRound$ that are then used by the proposer during the propose step as
explained above. The $validValue$ and $validRound$ are updated to $v$ and $r$
by a correct process in a round $r$ when the process receives valid $\Proposal$
message for the value $v$ and the corresponding $2f+1$ $\Prevote$ messages for
$id(v)$ in the round $r$ (see the rule at line~\ref{line:tab:recvPrevote}).
We now give briefly the intuition how managing and proposing $validValue$
and $validRound$ ensures termination. Formal treatment is left for
Section~\ref{sec:proof}.
The first thing to note is that during good period, because of the
\emph{Gossip communication} property, if a correct process $p$ locks a value
$v$ in some round $r$, all correct processes will update $validValue$ to $v$
and $validRound$ to $r$ before the end of the round $r$ (we prove this formally
in the Section~\ref{sec:proof}). The intuition is that messages that led to $p$
locking a value $v$ in the round $r$ will be gossiped to all correct processes
before the end of the round $r$, so it will update $validValue$ and
$validRound$ (the line~\ref{line:tab:recvPrevote}). Therefore, if a correct
process locks some value during good period, $validValue$ and $validRound$ are
updated by all correct processes so that the value proposed in the following
rounds will be acceptable by all correct processes. Note
that it could happen that during good period, no correct process locks a value,
but some correct process $q$ updates $validValue$ and $validRound$ during some
round. As no correct process locks a value in this case, $validValue_q$ and
$validRound_q$ will also be acceptable by all correct processes as
$validRound_q > lockedRound_c$ for every correct process $c$ and as the
\emph{Gossip communication} property ensures that the corresponding $\Prevote$
messages that $q$ received in the round $validRound_q$ are received by all
correct processes $\Delta$ time later.
Finally, it could happen that after GST, there is a long sequence of rounds in which
no correct process neither locks a value nor update $validValue$ and $validRound$.
In this case, during this sequence of rounds, the proposed value suggested by correct
processes was not accepted by all correct processes. Note that this sequence of rounds
is always finite as at the beginning of every
round there is at least a single correct process $c$ such that $validValue_c$
and $validRound_c$ are acceptable by every correct process. This is true as
there exists a correct process $c$ such that for every other correct process
$p$, $validRound_c > lockedRound_p$ or $validValue_c = lockedValue_p$. This is
true as $c$ is the process that has locked a value in the most recent round
among all correct processes (or no correct process locked any value). Therefore,
eventually $c$ will be the proper in some round and the proposed value will be accepted
by all correct processes, terminating therefore this sequence of
rounds.
Therefore, updating $validValue$ and $validRound$ variables, and the
\emph{Gossip communication} property, together ensures that eventually, during
the good period, there exists a round with a correct proposer whose proposed
value will be accepted by all correct processes, and all correct processes will
terminate in that round. Note that this mechanism, contrary to the common
termination mechanism illustrated in the
Figure~\ref{ch3:fig:coordinator-change}, does not require exchanging any
additional information in addition to messages already sent as part of what is
normally being called "normal" case.