diff --git a/CHANGELOG.md b/CHANGELOG.md index d34338a91..c97b962af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ IMPROVEMENTS: - [genesis] removed deprecated `app_options` field. +- [types] Genesis.AppStateJSON -> Genesis.AppState + +## 0.22.3 + +IMPROVEMENTS +- Update dependencies + * pin all values in Gopkg.toml to version or commit + * update golang/protobuf to v1.1.0 ## 0.22.2 @@ -21,6 +29,7 @@ BUG FIXES - NOTE: this is only for URI requests. JSONRPC requests and all responses will use quoted integers (the proto3 JSON standard). - [consensus] Fix halt on shutdown +- [tm_bench] Fix method of computing start time, and end time ## 0.22.1 diff --git a/Gopkg.lock b/Gopkg.lock index b1beaa208..44192a011 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -11,10 +11,9 @@ branch = "master" name = "github.com/btcsuite/btcd" packages = ["btcec"] - revision = "86fed781132ac890ee03e906e4ecd5d6fa180c64" + revision = "fdfc19097e7ac6b57035062056f5b7b4638b8898" [[projects]] - branch = "master" name = "github.com/btcsuite/btcutil" packages = [ "base58", @@ -29,16 +28,15 @@ version = "v1.1.0" [[projects]] - branch = "master" name = "github.com/ebuchman/fail-test" packages = ["."] revision = "95f809107225be108efcf10a3509e4ea6ceef3c4" [[projects]] - branch = "master" name = "github.com/fortytw2/leaktest" packages = ["."] - revision = "b008db64ef8daabb22ff6daa557f33b41d8f6ccd" + revision = "a5ef70473c97b71626b9abeda80ee92ba2a7de9e" + version = "v1.2.0" [[projects]] name = "github.com/fsnotify/fsnotify" @@ -180,13 +178,12 @@ version = "v1.0.0" [[projects]] - branch = "master" name = "github.com/prometheus/client_golang" packages = [ "prometheus", "prometheus/promhttp" ] - revision = "d6a9817c4afc94d51115e4a30d449056a3fbf547" + revision = "ae27198cdd90bf12cd134ad79d1366a6cf49f632" [[projects]] branch = "master" @@ -213,10 +210,9 @@ "nfs", "xfs" ] - revision = "40f013a808ec4fa79def444a1a56de4d1727efcb" + revision = "ae68e2d4c00fed4943b5f6698d504a5fe083da8a" [[projects]] - branch = "master" name = "github.com/rcrowley/go-metrics" packages = ["."] revision = "e2704e165165ec55d062f5919b4b29494e9fa790" @@ -266,8 +262,8 @@ "assert", "require" ] - revision = "f35b8ab0b5a2cef36673838d662e249dd9c94686" - version = "v1.2.2" + revision = "12b6f73e6084dad08a7c6e575284b177ecafbc71" + version = "v1.2.1" [[projects]] branch = "master" @@ -286,7 +282,7 @@ "leveldb/table", "leveldb/util" ] - revision = "e2150783cd35f5b607daca48afd8c57ec54cc995" + revision = "c4c61651e9e37fa117f53c5a906d3b63090d8445" [[projects]] branch = "master" @@ -326,7 +322,6 @@ revision = "a49355c7e3f8fe157a85be2f77e6e269a0f89602" [[projects]] - branch = "master" name = "golang.org/x/net" packages = [ "context", @@ -338,7 +333,7 @@ "netutil", "trace" ] - revision = "4cb1c02c05b0e749b0365f61ae859a8e0cfceed9" + revision = "292b43bbf7cb8d35ddf40f8d5100ef3837cced3f" [[projects]] branch = "master" @@ -347,7 +342,7 @@ "cpu", "unix" ] - revision = "7138fd3d9dc8335c567ca206f4333fb75eb05d56" + revision = "1b2967e3c290b7c545b3db0deeda16e9be4f98a2" [[projects]] name = "golang.org/x/text" @@ -414,6 +409,6 @@ [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "6e854634d6c203278ce83bef7725cecbcf90023b0d0e440fb3374acedacbd5ad" + inputs-digest = "b0718135d5ade0a75c6b8fe703f70eb9d8064ba871ec31abd9ace3c4ab944100" solver-name = "gps-cdcl" solver-version = 1 diff --git a/Gopkg.toml b/Gopkg.toml index ecce0e417..7bf82798e 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -23,16 +23,12 @@ # non-go = false # go-tests = true # unused-packages = true +# +########################################################### +# NOTE: All packages should be pinned to specific versions. +# Packages without releases must pin to a commit. -[[constraint]] - name = "github.com/ebuchman/fail-test" - branch = "master" - -[[constraint]] - name = "github.com/fortytw2/leaktest" - branch = "master" - [[constraint]] name = "github.com/go-kit/kit" version = "=0.6.0" @@ -47,16 +43,12 @@ [[constraint]] name = "github.com/gorilla/websocket" - version = "~1.2.0" + version = "=1.2.0" [[constraint]] name = "github.com/pkg/errors" version = "=0.8.0" -[[constraint]] - name = "github.com/rcrowley/go-metrics" - branch = "master" - [[constraint]] name = "github.com/spf13/cobra" version = "=0.0.1" @@ -67,29 +59,60 @@ [[constraint]] name = "github.com/stretchr/testify" - version = "~1.2.1" + version = "=1.2.1" [[constraint]] name = "github.com/tendermint/go-amino" - version = "~0.10.1" + version = "=0.10.1" [[constraint]] name = "google.golang.org/grpc" - version = "~1.11.3" + version = "=1.11.3" -# this got updated and broke, so locked to an old working commit ... +[[constraint]] + name = "github.com/fortytw2/leaktest" + version = "=1.2.0" + +################################### +## Some repos dont have releases. +## Pin to revision + +## We can remove this one by updating protobuf to v1.1.0 +## but then the grpc tests break with +#--- FAIL: TestBroadcastTx (0.01s) +#panic: message/group field common.KVPair:bytes without pointer [recovered] +# panic: message/group field common.KVPair:bytes without pointer +# +# ... +# +# github.com/tendermint/tendermint/rpc/grpc_test.TestBroadcastTx(0xc420a5ab40) +# /go/src/github.com/tendermint/tendermint/rpc/grpc/grpc_test.go:29 +0x141 [[override]] name = "google.golang.org/genproto" revision = "7fd901a49ba6a7f87732eb344f6e3c5b19d1b200" -[prune] - go-tests = true - unused-packages = true +[[constraint]] + name = "github.com/ebuchman/fail-test" + revision = "95f809107225be108efcf10a3509e4ea6ceef3c4" + +# last revision used by go-crypto +[[constraint]] + name = "github.com/btcsuite/btcutil" + revision = "d4cc87b860166d00d6b5b9e0d3b3d71d6088d4d4" +# Haven't made a release since 2016. [[constraint]] name = "github.com/prometheus/client_golang" - branch = "master" + revision = "ae27198cdd90bf12cd134ad79d1366a6cf49f632" + +[[constraint]] + name = "github.com/rcrowley/go-metrics" + revision = "e2704e165165ec55d062f5919b4b29494e9fa790" [[constraint]] - branch = "master" name = "golang.org/x/net" + revision = "292b43bbf7cb8d35ddf40f8d5100ef3837cced3f" + +[prune] + go-tests = true + unused-packages = true diff --git a/consensus/reactor.go b/consensus/reactor.go index 48ebcad23..3eb1d73aa 100644 --- a/consensus/reactor.go +++ b/consensus/reactor.go @@ -80,6 +80,9 @@ func (conR *ConsensusReactor) OnStop() { conR.BaseReactor.OnStop() conR.unsubscribeFromBroadcastEvents() conR.conS.Stop() + if !conR.FastSync() { + conR.conS.Wait() + } } // SwitchToConsensus switches from fast_sync mode to consensus mode. diff --git a/docs/specification/fast-sync.rst b/docs/networks/fast-sync.md similarity index 62% rename from docs/specification/fast-sync.rst rename to docs/networks/fast-sync.md index c98ec43a3..e92d82394 100644 --- a/docs/specification/fast-sync.rst +++ b/docs/networks/fast-sync.md @@ -1,8 +1,4 @@ -Fast Sync -========= - -Background ----------- +# Fast Sync In a proof of work blockchain, syncing with the chain is the same process as staying up-to-date with the consensus: download blocks, and @@ -14,21 +10,19 @@ scratch can take a very long time. It's much faster to just download blocks and check the merkle tree of validators than to run the real-time consensus gossip protocol. -Fast Sync ---------- +## Using Fast Sync -To support faster syncing, tendermint offers a ``fast-sync`` mode, which -is enabled by default, and can be toggled in the ``config.toml`` or via -``--fast_sync=false``. +To support faster syncing, tendermint offers a `fast-sync` mode, which +is enabled by default, and can be toggled in the `config.toml` or via +`--fast_sync=false`. In this mode, the tendermint daemon will sync hundreds of times faster than if it used the real-time consensus process. Once caught up, the daemon will switch out of fast sync and into the normal consensus mode. -After running for some time, the node is considered ``caught up`` if it +After running for some time, the node is considered `caught up` if it has at least one peer and it's height is at least as high as the max -reported peer height. See `the IsCaughtUp -method `__. +reported peer height. See [the IsCaughtUp +method](https://github.com/tendermint/tendermint/blob/b467515719e686e4678e6da4e102f32a491b85a0/blockchain/pool.go#L128). If we're lagging sufficiently, we should go back to fast syncing, but -this is an open issue: -https://github.com/tendermint/tendermint/issues/129 +this is an [open issue](https://github.com/tendermint/tendermint/issues/129). diff --git a/docs/spec/blockchain/encoding.md b/docs/spec/blockchain/encoding.md index 16902d099..49c88475b 100644 --- a/docs/spec/blockchain/encoding.md +++ b/docs/spec/blockchain/encoding.md @@ -149,7 +149,33 @@ func MakeParts(obj interface{}, partSize int) []Part ## Merkle Trees -Simple Merkle trees are used in numerous places in Tendermint to compute a cryptographic digest of a data structure. +For an overview of Merkle trees, see +[wikipedia](https://en.wikipedia.org/wiki/Merkle_tree) + + +A Simple Tree is a simple compact binary tree for a static list of items. Simple Merkle trees are used in numerous places in Tendermint to compute a cryptographic digest of a data structure. In a Simple Tree, the transactions and validation signatures of a block are hashed using this simple merkle tree logic. + +If the number of items is not a power of two, the tree will not be full +and some leaf nodes will be at different levels. Simple Tree tries to +keep both sides of the tree the same size, but the left side may be one +greater, for example: + +``` + Simple Tree with 6 items Simple Tree with 7 items + + * * + / \ / \ + / \ / \ + / \ / \ + / \ / \ + * * * * + / \ / \ / \ / \ + / \ / \ / \ / \ + / \ / \ / \ / \ + * h2 * h5 * * * h6 + / \ / \ / \ / \ / \ +h0 h1 h3 h4 h0 h1 h2 h3 h4 h5 +``` Tendermint always uses the `TMHASH` hash function, which is the first 20-bytes of the SHA256: @@ -235,6 +261,18 @@ func computeHashFromAunts(index, total int, leafHash []byte, innerHashes [][]byt } ``` +### Simple Tree with Dictionaries + +The Simple Tree is used to merkelize a list of items, so to merkelize a +(short) dictionary of key-value pairs, encode the dictionary as an +ordered list of ``KVPair`` structs. The block hash is such a hash +derived from all the fields of the block ``Header``. The state hash is +similarly derived. + +### IAVL+ Tree + +Because Tendermint only uses a Simple Merkle Tree, application developers are expect to use their own Merkle tree in their applications. For example, the IAVL+ Tree - an immutable self-balancing binary tree for persisting application state is used by the [Cosmos SDK](https://github.com/cosmos/cosmos-sdk/blob/develop/docs/core/multistore.md) + ## JSON ### Amino diff --git a/docs/spec/consensus/consensus.md b/docs/spec/consensus/consensus.md index 1bf075773..d6804779c 100644 --- a/docs/spec/consensus/consensus.md +++ b/docs/spec/consensus/consensus.md @@ -1,9 +1,329 @@ -We are working to finalize an updated Tendermint specification with formal -proofs of safety and liveness. +# Byzantine Consensus Algorithm -In the meantime, see the [description in the -docs](http://tendermint.readthedocs.io/en/master/specification/byzantine-consensus-algorithm.html). +## Terms -There are also relevant but somewhat outdated descriptions in Jae Kwon's [original -whitepaper](https://tendermint.com/static/docs/tendermint.pdf) and Ethan Buchman's [master's -thesis](https://atrium.lib.uoguelph.ca/xmlui/handle/10214/9769). +- The network is composed of optionally connected *nodes*. Nodes + directly connected to a particular node are called *peers*. +- The consensus process in deciding the next block (at some *height* + `H`) is composed of one or many *rounds*. +- `NewHeight`, `Propose`, `Prevote`, `Precommit`, and `Commit` + represent state machine states of a round. (aka `RoundStep` or + just "step"). +- A node is said to be *at* a given height, round, and step, or at + `(H,R,S)`, or at `(H,R)` in short to omit the step. +- To *prevote* or *precommit* something means to broadcast a [prevote + vote](https://godoc.org/github.com/tendermint/tendermint/types#Vote) + or [first precommit + vote](https://godoc.org/github.com/tendermint/tendermint/types#FirstPrecommit) + for something. +- A vote *at* `(H,R)` is a vote signed with the bytes for `H` and `R` + included in its [sign-bytes](block-structure.html#vote-sign-bytes). +- *+2/3* is short for "more than 2/3" +- *1/3+* is short for "1/3 or more" +- A set of +2/3 of prevotes for a particular block or `` at + `(H,R)` is called a *proof-of-lock-change* or *PoLC* for short. + +## State Machine Overview + +At each height of the blockchain a round-based protocol is run to +determine the next block. Each round is composed of three *steps* +(`Propose`, `Prevote`, and `Precommit`), along with two special steps +`Commit` and `NewHeight`. + +In the optimal scenario, the order of steps is: + +``` +NewHeight -> (Propose -> Prevote -> Precommit)+ -> Commit -> NewHeight ->... +``` + +The sequence `(Propose -> Prevote -> Precommit)` is called a *round*. +There may be more than one round required to commit a block at a given +height. Examples for why more rounds may be required include: + +- The designated proposer was not online. +- The block proposed by the designated proposer was not valid. +- The block proposed by the designated proposer did not propagate + in time. +- The block proposed was valid, but +2/3 of prevotes for the proposed + block were not received in time for enough validator nodes by the + time they reached the `Precommit` step. Even though +2/3 of prevotes + are necessary to progress to the next step, at least one validator + may have voted `` or maliciously voted for something else. +- The block proposed was valid, and +2/3 of prevotes were received for + enough nodes, but +2/3 of precommits for the proposed block were not + received for enough validator nodes. + +Some of these problems are resolved by moving onto the next round & +proposer. Others are resolved by increasing certain round timeout +parameters over each successive round. + +## State Machine Diagram + +``` + +-------------------------------------+ + v |(Wait til `CommmitTime+timeoutCommit`) + +-----------+ +-----+-----+ + +----------> | Propose +--------------+ | NewHeight | + | +-----------+ | +-----------+ + | | ^ + |(Else, after timeoutPrecommit) v | ++-----+-----+ +-----------+ | +| Precommit | <------------------------+ Prevote | | ++-----+-----+ +-----------+ | + |(When +2/3 Precommits for block found) | + v | ++--------------------------------------------------------------------+ + | Commit | + | | + | * Set CommitTime = now; | + | * Wait for block, then stage/save/commit block; | + +--------------------------------------------------------------------+ +``` + +Background Gossip +================= + +A node may not have a corresponding validator private key, but it +nevertheless plays an active role in the consensus process by relaying +relevant meta-data, proposals, blocks, and votes to its peers. A node +that has the private keys of an active validator and is engaged in +signing votes is called a *validator-node*. All nodes (not just +validator-nodes) have an associated state (the current height, round, +and step) and work to make progress. + +Between two nodes there exists a `Connection`, and multiplexed on top of +this connection are fairly throttled `Channel`s of information. An +epidemic gossip protocol is implemented among some of these channels to +bring peers up to speed on the most recent state of consensus. For +example, + +- Nodes gossip `PartSet` parts of the current round's proposer's + proposed block. A LibSwift inspired algorithm is used to quickly + broadcast blocks across the gossip network. +- Nodes gossip prevote/precommit votes. A node `NODE_A` that is ahead + of `NODE_B` can send `NODE_B` prevotes or precommits for `NODE_B`'s + current (or future) round to enable it to progress forward. +- Nodes gossip prevotes for the proposed PoLC (proof-of-lock-change) + round if one is proposed. +- Nodes gossip to nodes lagging in blockchain height with block + [commits](https://godoc.org/github.com/tendermint/tendermint/types#Commit) + for older blocks. +- Nodes opportunistically gossip `HasVote` messages to hint peers what + votes it already has. +- Nodes broadcast their current state to all neighboring peers. (but + is not gossiped further) + +There's more, but let's not get ahead of ourselves here. + +## Proposals + +A proposal is signed and published by the designated proposer at each +round. The proposer is chosen by a deterministic and non-choking round +robin selection algorithm that selects proposers in proportion to their +voting power (see +[implementation](https://github.com/tendermint/tendermint/blob/develop/types/validator_set.go)). + +A proposal at `(H,R)` is composed of a block and an optional latest +`PoLC-Round < R` which is included iff the proposer knows of one. This +hints the network to allow nodes to unlock (when safe) to ensure the +liveness property. + +## State Machine Spec + +### Propose Step (height:H,round:R) + +Upon entering `Propose`: - The designated proposer proposes a block at +`(H,R)`. + +The `Propose` step ends: - After `timeoutProposeR` after entering +`Propose`. --> goto `Prevote(H,R)` - After receiving proposal block +and all prevotes at `PoLC-Round`. --> goto `Prevote(H,R)` - After +[common exit conditions](#common-exit-conditions) + +### Prevote Step (height:H,round:R) + +Upon entering `Prevote`, each validator broadcasts its prevote vote. + +- First, if the validator is locked on a block since `LastLockRound` + but now has a PoLC for something else at round `PoLC-Round` where + `LastLockRound < PoLC-Round < R`, then it unlocks. +- If the validator is still locked on a block, it prevotes that. +- Else, if the proposed block from `Propose(H,R)` is good, it + prevotes that. +- Else, if the proposal is invalid or wasn't received on time, it + prevotes ``. + +The `Prevote` step ends: - After +2/3 prevotes for a particular block or +``. -->; goto `Precommit(H,R)` - After `timeoutPrevote` after +receiving any +2/3 prevotes. --> goto `Precommit(H,R)` - After +[common exit conditions](#common-exit-conditions) + +### Precommit Step (height:H,round:R) + +Upon entering `Precommit`, each validator broadcasts its precommit vote. +- If the validator has a PoLC at `(H,R)` for a particular block `B`, it +(re)locks (or changes lock to) and precommits `B` and sets +`LastLockRound = R`. - Else, if the validator has a PoLC at `(H,R)` for +``, it unlocks and precommits ``. - Else, it keeps the lock +unchanged and precommits ``. + +A precommit for `` means "I didn’t see a PoLC for this round, but I +did get +2/3 prevotes and waited a bit". + +The Precommit step ends: - After +2/3 precommits for ``. --> +goto `Propose(H,R+1)` - After `timeoutPrecommit` after receiving any ++2/3 precommits. --> goto `Propose(H,R+1)` - After [common exit +conditions](#common-exit-conditions) + +### Common exit conditions + +- After +2/3 precommits for a particular block. --> goto + `Commit(H)` +- After any +2/3 prevotes received at `(H,R+x)`. --> goto + `Prevote(H,R+x)` +- After any +2/3 precommits received at `(H,R+x)`. --> goto + `Precommit(H,R+x)` + +### Commit Step (height:H) + +- Set `CommitTime = now()` +- Wait until block is received. --> goto `NewHeight(H+1)` + +### NewHeight Step (height:H) + +- Move `Precommits` to `LastCommit` and increment height. +- Set `StartTime = CommitTime+timeoutCommit` +- Wait until `StartTime` to receive straggler commits. --> goto + `Propose(H,0)` + +## Proofs + +### Proof of Safety + +Assume that at most -1/3 of the voting power of validators is byzantine. +If a validator commits block `B` at round `R`, it's because it saw +2/3 +of precommits at round `R`. This implies that 1/3+ of honest nodes are +still locked at round `R' > R`. These locked validators will remain +locked until they see a PoLC at `R' > R`, but this won't happen because +1/3+ are locked and honest, so at most -2/3 are available to vote for +anything other than `B`. + +### Proof of Liveness + +If 1/3+ honest validators are locked on two different blocks from +different rounds, a proposers' `PoLC-Round` will eventually cause nodes +locked from the earlier round to unlock. Eventually, the designated +proposer will be one that is aware of a PoLC at the later round. Also, +`timeoutProposalR` increments with round `R`, while the size of a +proposal are capped, so eventually the network is able to "fully gossip" +the whole proposal (e.g. the block & PoLC). + +### Proof of Fork Accountability + +Define the JSet (justification-vote-set) at height `H` of a validator +`V1` to be all the votes signed by the validator at `H` along with +justification PoLC prevotes for each lock change. For example, if `V1` +signed the following precommits: `Precommit(B1 @ round 0)`, +`Precommit( @ round 1)`, `Precommit(B2 @ round 4)` (note that no +precommits were signed for rounds 2 and 3, and that's ok), +`Precommit(B1 @ round 0)` must be justified by a PoLC at round 0, and +`Precommit(B2 @ round 4)` must be justified by a PoLC at round 4; but +the precommit for `` at round 1 is not a lock-change by definition +so the JSet for `V1` need not include any prevotes at round 1, 2, or 3 +(unless `V1` happened to have prevoted for those rounds). + +Further, define the JSet at height `H` of a set of validators `VSet` to +be the union of the JSets for each validator in `VSet`. For a given +commit by honest validators at round `R` for block `B` we can construct +a JSet to justify the commit for `B` at `R`. We say that a JSet +*justifies* a commit at `(H,R)` if all the committers (validators in the +commit-set) are each justified in the JSet with no duplicitous vote +signatures (by the committers). + +- **Lemma**: When a fork is detected by the existence of two + conflicting [commits](./validators.html#commiting-a-block), the + union of the JSets for both commits (if they can be compiled) must + include double-signing by at least 1/3+ of the validator set. + **Proof**: The commit cannot be at the same round, because that + would immediately imply double-signing by 1/3+. Take the union of + the JSets of both commits. If there is no double-signing by at least + 1/3+ of the validator set in the union, then no honest validator + could have precommitted any different block after the first commit. + Yet, +2/3 did. Reductio ad absurdum. + +As a corollary, when there is a fork, an external process can determine +the blame by requiring each validator to justify all of its round votes. +Either we will find 1/3+ who cannot justify at least one of their votes, +and/or, we will find 1/3+ who had double-signed. + +### Alternative algorithm + +Alternatively, we can take the JSet of a commit to be the "full commit". +That is, if light clients and validators do not consider a block to be +committed unless the JSet of the commit is also known, then we get the +desirable property that if there ever is a fork (e.g. there are two +conflicting "full commits"), then 1/3+ of the validators are immediately +punishable for double-signing. + +There are many ways to ensure that the gossip network efficiently share +the JSet of a commit. One solution is to add a new message type that +tells peers that this node has (or does not have) a +2/3 majority for B +(or) at (H,R), and a bitarray of which votes contributed towards that +majority. Peers can react by responding with appropriate votes. + +We will implement such an algorithm for the next iteration of the +Tendermint consensus protocol. + +Other potential improvements include adding more data in votes such as +the last known PoLC round that caused a lock change, and the last voted +round/step (or, we may require that validators not skip any votes). This +may make JSet verification/gossip logic easier to implement. + +### Censorship Attacks + +Due to the definition of a block +[commit](../../tendermint-core/validator.md#commiting-a-block), any 1/3+ coalition of +validators can halt the blockchain by not broadcasting their votes. Such +a coalition can also censor particular transactions by rejecting blocks +that include these transactions, though this would result in a +significant proportion of block proposals to be rejected, which would +slow down the rate of block commits of the blockchain, reducing its +utility and value. The malicious coalition might also broadcast votes in +a trickle so as to grind blockchain block commits to a near halt, or +engage in any combination of these attacks. + +If a global active adversary were also involved, it can partition the +network in such a way that it may appear that the wrong subset of +validators were responsible for the slowdown. This is not just a +limitation of Tendermint, but rather a limitation of all consensus +protocols whose network is potentially controlled by an active +adversary. + +### Overcoming Forks and Censorship Attacks + +For these types of attacks, a subset of the validators through external +means should coordinate to sign a reorg-proposal that chooses a fork +(and any evidence thereof) and the initial subset of validators with +their signatures. Validators who sign such a reorg-proposal forego its +collateral on all other forks. Clients should verify the signatures on +the reorg-proposal, verify any evidence, and make a judgement or prompt +the end-user for a decision. For example, a phone wallet app may prompt +the user with a security warning, while a refrigerator may accept any +reorg-proposal signed by +1/2 of the original validators. + +No non-synchronous Byzantine fault-tolerant algorithm can come to +consensus when 1/3+ of validators are dishonest, yet a fork assumes that +1/3+ of validators have already been dishonest by double-signing or +lock-changing without justification. So, signing the reorg-proposal is a +coordination problem that cannot be solved by any non-synchronous +protocol (i.e. automatically, and without making assumptions about the +reliability of the underlying network). It must be provided by means +external to the weakly-synchronous Tendermint consensus algorithm. For +now, we leave the problem of reorg-proposal coordination to human +coordination via internet media. Validators must take care to ensure +that there are no significant network partitions, to avoid situations +where two conflicting reorg-proposals are signed. + +Assuming that the external coordination medium and protocol is robust, +it follows that forks are less of a concern than [censorship +attacks](#censorship-attacks). diff --git a/docs/specification/block-structure.rst b/docs/specification/block-structure.rst deleted file mode 100644 index 7d8f3464c..000000000 --- a/docs/specification/block-structure.rst +++ /dev/null @@ -1,218 +0,0 @@ -Block Structure -=============== - -The tendermint consensus engine records all agreements by a -supermajority of nodes into a blockchain, which is replicated among all -nodes. This blockchain is accessible via various rpc endpoints, mainly -``/block?height=`` to get the full block, as well as -``/blockchain?minHeight=_&maxHeight=_`` to get a list of headers. But -what exactly is stored in these blocks? - -Block -~~~~~ - -A -`Block `__ -contains: - -- a `Header <#header>`__ contains merkle hashes for various chain - states -- the - `Data `__ - is all transactions which are to be processed -- the `LastCommit <#commit>`__ > 2/3 signatures for the last block - -The signatures returned along with block ``H`` are those validating -block ``H-1``. This can be a little confusing, but we must also consider -that the ``Header`` also contains the ``LastCommitHash``. It would be -impossible for a Header to include the commits that sign it, as it would -cause an infinite loop here. But when we get block ``H``, we find -``Header.LastCommitHash``, which must match the hash of ``LastCommit``. - -Header -~~~~~~ - -The -`Header `__ -contains lots of information (follow link for up-to-date info). Notably, -it maintains the ``Height``, the ``LastBlockID`` (to make it a chain), -and hashes of the data, the app state, and the validator set. This is -important as the only item that is signed by the validators is the -``Header``, and all other data must be validated against one of the -merkle hashes in the ``Header``. - -The ``DataHash`` can provide a nice check on the -`Data `__ -returned in this same block. If you are subscribed to new blocks, via -tendermint RPC, in order to display or process the new transactions you -should at least validate that the ``DataHash`` is valid. If it is -important to verify autheniticity, you must wait for the ``LastCommit`` -from the next block to make sure the block header (including -``DataHash``) was properly signed. - -The ``ValidatorHash`` contains a hash of the current -`Validators `__. -Tracking all changes in the validator set is complex, but a client can -quickly compare this hash with the `hash of the currently known -validators `__ -to see if there have been changes. - -The ``AppHash`` serves as the basis for validating any merkle proofs -that come from the ABCI application. It represents the -state of the actual application, rather that the state of the blockchain -itself. This means it's necessary in order to perform any business -logic, such as verifying an account balance. - -**Note** After the transactions are committed to a block, they still -need to be processed in a separate step, which happens between the -blocks. If you find a given transaction in the block at height ``H``, -the effects of running that transaction will be first visible in the -``AppHash`` from the block header at height ``H+1``. - -Like the ``LastCommit`` issue, this is a requirement of the immutability -of the block chain, as the application only applies transactions *after* -they are commited to the chain. - -Commit -~~~~~~ - -The -`Commit `__ -contains a set of -`Votes `__ -that were made by the validator set to reach consensus on this block. -This is the key to the security in any PoS system, and actually no data -that cannot be traced back to a block header with a valid set of Votes -can be trusted. Thus, getting the Commit data and verifying the votes is -extremely important. - -As mentioned above, in order to find the ``precommit votes`` for block -header ``H``, we need to query block ``H+1``. Then we need to check the -votes, make sure they really are for that block, and properly formatted. -Much of this code is implemented in Go in the -`light-client `__ package. -If you look at the code, you will notice that we need to provide the -``chainID`` of the blockchain in order to properly calculate the votes. -This is to protect anyone from swapping votes between chains to fake (or -frame) a validator. Also note that this ``chainID`` is in the -``genesis.json`` from *Tendermint*, not the ``genesis.json`` from the -basecoin app (`that is a different -chainID... `__). - -Once we have those votes, and we calculated the proper `sign -bytes `__ -using the chainID and a `nice helper -function `__, -we can verify them. The light client is responsible for maintaining a -set of validators that we trust. Each vote only stores the validators -``Address``, as well as the ``Signature``. Assuming we have a local copy -of the trusted validator set, we can look up the ``Public Key`` of the -validator given its ``Address``, then verify that the ``Signature`` -matches the ``SignBytes`` and ``Public Key``. Then we sum up the total -voting power of all validators, whose votes fulfilled all these -stringent requirements. If the total number of voting power for a single -block is greater than 2/3 of all voting power, then we can finally trust -the block header, the AppHash, and the proof we got from the ABCI -application. - -Vote Sign Bytes -^^^^^^^^^^^^^^^ - -The ``sign-bytes`` of a vote is produced by taking a -`stable-json `__-like -deterministic JSON `wire <./wire-protocol.html>`__ encoding of -the vote (excluding the ``Signature`` field), and wrapping it with -``{"chain_id":"my_chain","vote":...}``. - -For example, a precommit vote might have the following ``sign-bytes``: - -.. code:: json - - {"chain_id":"my_chain","vote":{"block_hash":"611801F57B4CE378DF1A3FFF1216656E89209A99","block_parts_header":{"hash":"B46697379DBE0774CC2C3B656083F07CA7E0F9CE","total":123},"height":1234,"round":1,"type":2}} - -Block Hash -~~~~~~~~~~ - -The `block -hash `__ -is the `Simple Tree hash <./merkle.html#simple-tree-with-dictionaries>`__ -of the fields of the block ``Header`` encoded as a list of -``KVPair``\ s. - -Transaction -~~~~~~~~~~~ - -A transaction is any sequence of bytes. It is up to your -ABCI application to accept or reject transactions. - -BlockID -~~~~~~~ - -Many of these data structures refer to the -`BlockID `__, -which is the ``BlockHash`` (hash of the block header, also referred to -by the next block) along with the ``PartSetHeader``. The -``PartSetHeader`` is explained below and is used internally to -orchestrate the p2p propogation. For clients, it is basically opaque -bytes, but they must match for all votes. - -PartSetHeader -~~~~~~~~~~~~~ - -The -`PartSetHeader `__ -contains the total number of pieces in a -`PartSet `__, -and the Merkle root hash of those pieces. - -PartSet -~~~~~~~ - -PartSet is used to split a byteslice of data into parts (pieces) for -transmission. By splitting data into smaller parts and computing a -Merkle root hash on the list, you can verify that a part is legitimately -part of the complete data, and the part can be forwarded to other peers -before all the parts are known. In short, it's a fast way to securely -propagate a large chunk of data (like a block) over a gossip network. - -PartSet was inspired by the LibSwift project. - -Usage: - -.. code:: go - - data := RandBytes(2 << 20) // Something large - - partSet := NewPartSetFromData(data) - partSet.Total() // Total number of 4KB parts - partSet.Count() // Equal to the Total, since we already have all the parts - partSet.Hash() // The Merkle root hash - partSet.BitArray() // A BitArray of partSet.Total() 1's - - header := partSet.Header() // Send this to the peer - header.Total // Total number of parts - header.Hash // The merkle root hash - - // Now we'll reconstruct the data from the parts - partSet2 := NewPartSetFromHeader(header) - partSet2.Total() // Same total as partSet.Total() - partSet2.Count() // Zero, since this PartSet doesn't have any parts yet. - partSet2.Hash() // Same hash as in partSet.Hash() - partSet2.BitArray() // A BitArray of partSet.Total() 0's - - // In a gossip network the parts would arrive in arbitrary order, perhaps - // in response to explicit requests for parts, or optimistically in response - // to the receiving peer's partSet.BitArray(). - for !partSet2.IsComplete() { - part := receivePartFromGossipNetwork() - added, err := partSet2.AddPart(part) - if err != nil { - // A wrong part, - // the merkle trail does not hash to partSet2.Hash() - } else if !added { - // A duplicate part already received - } - } - - data2, _ := ioutil.ReadAll(partSet2.GetReader()) - bytes.Equal(data, data2) // true diff --git a/docs/specification/byzantine-consensus-algorithm.rst b/docs/specification/byzantine-consensus-algorithm.rst deleted file mode 100644 index 15eab32d7..000000000 --- a/docs/specification/byzantine-consensus-algorithm.rst +++ /dev/null @@ -1,349 +0,0 @@ -Byzantine Consensus Algorithm -============================= - -Terms ------ - -- The network is composed of optionally connected *nodes*. Nodes - directly connected to a particular node are called *peers*. -- The consensus process in deciding the next block (at some *height* - ``H``) is composed of one or many *rounds*. -- ``NewHeight``, ``Propose``, ``Prevote``, ``Precommit``, and - ``Commit`` represent state machine states of a round. (aka - ``RoundStep`` or just "step"). -- A node is said to be *at* a given height, round, and step, or at - ``(H,R,S)``, or at ``(H,R)`` in short to omit the step. -- To *prevote* or *precommit* something means to broadcast a `prevote - vote `__ - or `first precommit - vote `__ - for something. -- A vote *at* ``(H,R)`` is a vote signed with the bytes for ``H`` and - ``R`` included in its - `sign-bytes `__. -- *+2/3* is short for "more than 2/3" -- *1/3+* is short for "1/3 or more" -- A set of +2/3 of prevotes for a particular block or ```` at - ``(H,R)`` is called a *proof-of-lock-change* or *PoLC* for short. - -State Machine Overview ----------------------- - -At each height of the blockchain a round-based protocol is run to -determine the next block. Each round is composed of three *steps* -(``Propose``, ``Prevote``, and ``Precommit``), along with two special -steps ``Commit`` and ``NewHeight``. - -In the optimal scenario, the order of steps is: - -:: - - NewHeight -> (Propose -> Prevote -> Precommit)+ -> Commit -> NewHeight ->... - -The sequence ``(Propose -> Prevote -> Precommit)`` is called a *round*. -There may be more than one round required to commit a block at a given -height. Examples for why more rounds may be required include: - -- The designated proposer was not online. -- The block proposed by the designated proposer was not valid. -- The block proposed by the designated proposer did not propagate in - time. -- The block proposed was valid, but +2/3 of prevotes for the proposed - block were not received in time for enough validator nodes by the - time they reached the ``Precommit`` step. Even though +2/3 of - prevotes are necessary to progress to the next step, at least one - validator may have voted ```` or maliciously voted for something - else. -- The block proposed was valid, and +2/3 of prevotes were received for - enough nodes, but +2/3 of precommits for the proposed block were not - received for enough validator nodes. - -Some of these problems are resolved by moving onto the next round & -proposer. Others are resolved by increasing certain round timeout -parameters over each successive round. - -State Machine Diagram ---------------------- - -:: - - +-------------------------------------+ - v |(Wait til `CommmitTime+timeoutCommit`) - +-----------+ +-----+-----+ - +----------> | Propose +--------------+ | NewHeight | - | +-----------+ | +-----------+ - | | ^ - |(Else, after timeoutPrecommit) v | - +-----+-----+ +-----------+ | - | Precommit | <------------------------+ Prevote | | - +-----+-----+ +-----------+ | - |(When +2/3 Precommits for block found) | - v | - +--------------------------------------------------------------------+ - | Commit | - | | - | * Set CommitTime = now; | - | * Wait for block, then stage/save/commit block; | - +--------------------------------------------------------------------+ - -Background Gossip ------------------ - -A node may not have a corresponding validator private key, but it -nevertheless plays an active role in the consensus process by relaying -relevant meta-data, proposals, blocks, and votes to its peers. A node -that has the private keys of an active validator and is engaged in -signing votes is called a *validator-node*. All nodes (not just -validator-nodes) have an associated state (the current height, round, -and step) and work to make progress. - -Between two nodes there exists a ``Connection``, and multiplexed on top -of this connection are fairly throttled ``Channel``\ s of information. -An epidemic gossip protocol is implemented among some of these channels -to bring peers up to speed on the most recent state of consensus. For -example, - -- Nodes gossip ``PartSet`` parts of the current round's proposer's - proposed block. A LibSwift inspired algorithm is used to quickly - broadcast blocks across the gossip network. -- Nodes gossip prevote/precommit votes. A node NODE\_A that is ahead of - NODE\_B can send NODE\_B prevotes or precommits for NODE\_B's current - (or future) round to enable it to progress forward. -- Nodes gossip prevotes for the proposed PoLC (proof-of-lock-change) - round if one is proposed. -- Nodes gossip to nodes lagging in blockchain height with block - `commits `__ - for older blocks. -- Nodes opportunistically gossip ``HasVote`` messages to hint peers - what votes it already has. -- Nodes broadcast their current state to all neighboring peers. (but is - not gossiped further) - -There's more, but let's not get ahead of ourselves here. - -Proposals ---------- - -A proposal is signed and published by the designated proposer at each -round. The proposer is chosen by a deterministic and non-choking round -robin selection algorithm that selects proposers in proportion to their -voting power. (see -`implementation `__) - -A proposal at ``(H,R)`` is composed of a block and an optional latest -``PoLC-Round < R`` which is included iff the proposer knows of one. This -hints the network to allow nodes to unlock (when safe) to ensure the -liveness property. - -State Machine Spec ------------------- - -Propose Step (height:H,round:R) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Upon entering ``Propose``: - The designated proposer proposes a block at -``(H,R)``. - -The ``Propose`` step ends: - After ``timeoutProposeR`` after entering -``Propose``. --> goto ``Prevote(H,R)`` - After receiving proposal block -and all prevotes at ``PoLC-Round``. --> goto ``Prevote(H,R)`` - After -`common exit conditions <#common-exit-conditions>`__ - -Prevote Step (height:H,round:R) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Upon entering ``Prevote``, each validator broadcasts its prevote vote. - -- First, if the validator is locked on a block since ``LastLockRound`` - but now has a PoLC for something else at round ``PoLC-Round`` where - ``LastLockRound < PoLC-Round < R``, then it unlocks. -- If the validator is still locked on a block, it prevotes that. -- Else, if the proposed block from ``Propose(H,R)`` is good, it - prevotes that. -- Else, if the proposal is invalid or wasn't received on time, it - prevotes ````. - -The ``Prevote`` step ends: - After +2/3 prevotes for a particular block -or ````. --> goto ``Precommit(H,R)`` - After ``timeoutPrevote`` -after receiving any +2/3 prevotes. --> goto ``Precommit(H,R)`` - After -`common exit conditions <#common-exit-conditions>`__ - -Precommit Step (height:H,round:R) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Upon entering ``Precommit``, each validator broadcasts its precommit -vote. - If the validator has a PoLC at ``(H,R)`` for a particular block -``B``, it (re)locks (or changes lock to) and precommits ``B`` and sets -``LastLockRound = R``. - Else, if the validator has a PoLC at ``(H,R)`` -for ````, it unlocks and precommits ````. - Else, it keeps the -lock unchanged and precommits ````. - -A precommit for ```` means "I didn’t see a PoLC for this round, but -I did get +2/3 prevotes and waited a bit". - -The Precommit step ends: - After +2/3 precommits for ````. --> goto -``Propose(H,R+1)`` - After ``timeoutPrecommit`` after receiving any +2/3 -precommits. --> goto ``Propose(H,R+1)`` - After `common exit -conditions <#common-exit-conditions>`__ - -common exit conditions -^^^^^^^^^^^^^^^^^^^^^^ - -- After +2/3 precommits for a particular block. --> goto ``Commit(H)`` -- After any +2/3 prevotes received at ``(H,R+x)``. --> goto - ``Prevote(H,R+x)`` -- After any +2/3 precommits received at ``(H,R+x)``. --> goto - ``Precommit(H,R+x)`` - -Commit Step (height:H) -~~~~~~~~~~~~~~~~~~~~~~ - -- Set ``CommitTime = now()`` -- Wait until block is received. --> goto ``NewHeight(H+1)`` - -NewHeight Step (height:H) -~~~~~~~~~~~~~~~~~~~~~~~~~ - -- Move ``Precommits`` to ``LastCommit`` and increment height. -- Set ``StartTime = CommitTime+timeoutCommit`` -- Wait until ``StartTime`` to receive straggler commits. --> goto - ``Propose(H,0)`` - -Proofs ------- - -Proof of Safety -~~~~~~~~~~~~~~~ - -Assume that at most -1/3 of the voting power of validators is byzantine. -If a validator commits block ``B`` at round ``R``, it's because it saw -+2/3 of precommits at round ``R``. This implies that 1/3+ of honest -nodes are still locked at round ``R' > R``. These locked validators will -remain locked until they see a PoLC at ``R' > R``, but this won't happen -because 1/3+ are locked and honest, so at most -2/3 are available to -vote for anything other than ``B``. - -Proof of Liveness -~~~~~~~~~~~~~~~~~ - -If 1/3+ honest validators are locked on two different blocks from -different rounds, a proposers' ``PoLC-Round`` will eventually cause -nodes locked from the earlier round to unlock. Eventually, the -designated proposer will be one that is aware of a PoLC at the later -round. Also, ``timeoutProposalR`` increments with round ``R``, while the -size of a proposal are capped, so eventually the network is able to -"fully gossip" the whole proposal (e.g. the block & PoLC). - -Proof of Fork Accountability -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Define the JSet (justification-vote-set) at height ``H`` of a validator -``V1`` to be all the votes signed by the validator at ``H`` along with -justification PoLC prevotes for each lock change. For example, if ``V1`` -signed the following precommits: ``Precommit(B1 @ round 0)``, -``Precommit( @ round 1)``, ``Precommit(B2 @ round 4)`` (note that -no precommits were signed for rounds 2 and 3, and that's ok), -``Precommit(B1 @ round 0)`` must be justified by a PoLC at round 0, and -``Precommit(B2 @ round 4)`` must be justified by a PoLC at round 4; but -the precommit for ```` at round 1 is not a lock-change by -definition so the JSet for ``V1`` need not include any prevotes at round -1, 2, or 3 (unless ``V1`` happened to have prevoted for those rounds). - -Further, define the JSet at height ``H`` of a set of validators ``VSet`` -to be the union of the JSets for each validator in ``VSet``. For a given -commit by honest validators at round ``R`` for block ``B`` we can -construct a JSet to justify the commit for ``B`` at ``R``. We say that a -JSet *justifies* a commit at ``(H,R)`` if all the committers (validators -in the commit-set) are each justified in the JSet with no duplicitous -vote signatures (by the committers). - -- **Lemma**: When a fork is detected by the existence of two - conflicting `commits <./validators.html#commiting-a-block>`__, - the union of the JSets for both commits (if they can be compiled) - must include double-signing by at least 1/3+ of the validator set. - **Proof**: The commit cannot be at the same round, because that would - immediately imply double-signing by 1/3+. Take the union of the JSets - of both commits. If there is no double-signing by at least 1/3+ of - the validator set in the union, then no honest validator could have - precommitted any different block after the first commit. Yet, +2/3 - did. Reductio ad absurdum. - -As a corollary, when there is a fork, an external process can determine -the blame by requiring each validator to justify all of its round votes. -Either we will find 1/3+ who cannot justify at least one of their votes, -and/or, we will find 1/3+ who had double-signed. - -Alternative algorithm -~~~~~~~~~~~~~~~~~~~~~ - -Alternatively, we can take the JSet of a commit to be the "full commit". -That is, if light clients and validators do not consider a block to be -committed unless the JSet of the commit is also known, then we get the -desirable property that if there ever is a fork (e.g. there are two -conflicting "full commits"), then 1/3+ of the validators are immediately -punishable for double-signing. - -There are many ways to ensure that the gossip network efficiently share -the JSet of a commit. One solution is to add a new message type that -tells peers that this node has (or does not have) a +2/3 majority for B -(or ) at (H,R), and a bitarray of which votes contributed towards that -majority. Peers can react by responding with appropriate votes. - -We will implement such an algorithm for the next iteration of the -Tendermint consensus protocol. - -Other potential improvements include adding more data in votes such as -the last known PoLC round that caused a lock change, and the last voted -round/step (or, we may require that validators not skip any votes). This -may make JSet verification/gossip logic easier to implement. - -Censorship Attacks -~~~~~~~~~~~~~~~~~~ - -Due to the definition of a block -`commit `__, any 1/3+ -coalition of validators can halt the blockchain by not broadcasting -their votes. Such a coalition can also censor particular transactions by -rejecting blocks that include these transactions, though this would -result in a significant proportion of block proposals to be rejected, -which would slow down the rate of block commits of the blockchain, -reducing its utility and value. The malicious coalition might also -broadcast votes in a trickle so as to grind blockchain block commits to -a near halt, or engage in any combination of these attacks. - -If a global active adversary were also involved, it can partition the -network in such a way that it may appear that the wrong subset of -validators were responsible for the slowdown. This is not just a -limitation of Tendermint, but rather a limitation of all consensus -protocols whose network is potentially controlled by an active -adversary. - -Overcoming Forks and Censorship Attacks -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -For these types of attacks, a subset of the validators through external -means should coordinate to sign a reorg-proposal that chooses a fork -(and any evidence thereof) and the initial subset of validators with -their signatures. Validators who sign such a reorg-proposal forego its -collateral on all other forks. Clients should verify the signatures on -the reorg-proposal, verify any evidence, and make a judgement or prompt -the end-user for a decision. For example, a phone wallet app may prompt -the user with a security warning, while a refrigerator may accept any -reorg-proposal signed by +1/2 of the original validators. - -No non-synchronous Byzantine fault-tolerant algorithm can come to -consensus when 1/3+ of validators are dishonest, yet a fork assumes that -1/3+ of validators have already been dishonest by double-signing or -lock-changing without justification. So, signing the reorg-proposal is a -coordination problem that cannot be solved by any non-synchronous -protocol (i.e. automatically, and without making assumptions about the -reliability of the underlying network). It must be provided by means -external to the weakly-synchronous Tendermint consensus algorithm. For -now, we leave the problem of reorg-proposal coordination to human -coordination via internet media. Validators must take care to ensure -that there are no significant network partitions, to avoid situations -where two conflicting reorg-proposals are signed. - -Assuming that the external coordination medium and protocol is robust, -it follows that forks are less of a concern than `censorship -attacks <#censorship-attacks>`__. diff --git a/docs/specification/corruption.rst b/docs/specification/corruption.rst deleted file mode 100644 index 6ae19fb18..000000000 --- a/docs/specification/corruption.rst +++ /dev/null @@ -1,70 +0,0 @@ -Corruption -========== - -Important step --------------- - -Make sure you have a backup of the Tendermint data directory. - -Possible causes ---------------- - -Remember that most corruption is caused by hardware issues: - -- RAID controllers with faulty / worn out battery backup, and an unexpected power loss -- Hard disk drives with write-back cache enabled, and an unexpected power loss -- Cheap SSDs with insufficient power-loss protection, and an unexpected power-loss -- Defective RAM -- Defective or overheating CPU(s) - -Other causes can be: - -- Database systems configured with fsync=off and an OS crash or power loss -- Filesystems configured to use write barriers plus a storage layer that ignores write barriers. LVM is a particular culprit. -- Tendermint bugs -- Operating system bugs -- Admin error - - directly modifying Tendermint data-directory contents - -(Source: https://wiki.postgresql.org/wiki/Corruption) - -WAL Corruption --------------- - -If consensus WAL is corrupted at the lastest height and you are trying to start -Tendermint, replay will fail with panic. - -Recovering from data corruption can be hard and time-consuming. Here are two approaches you can take: - -1) Delete the WAL file and restart Tendermint. It will attempt to sync with other peers. -2) Try to repair the WAL file manually: - - 1. Create a backup of the corrupted WAL file: - - .. code:: bash - - cp "$TMHOME/data/cs.wal/wal" > /tmp/corrupted_wal_backup - - 2. Use ./scripts/wal2json to create a human-readable version - - .. code:: bash - - ./scripts/wal2json/wal2json "$TMHOME/data/cs.wal/wal" > /tmp/corrupted_wal - - 3. Search for a "CORRUPTED MESSAGE" line. - 4. By looking at the previous message and the message after the corrupted one - and looking at the logs, try to rebuild the message. If the consequent - messages are marked as corrupted too (this may happen if length header - got corrupted or some writes did not make it to the WAL ~ truncation), - then remove all the lines starting from the corrupted one and restart - Tendermint. - - .. code:: bash - - $EDITOR /tmp/corrupted_wal - - 5. After editing, convert this file back into binary form by running: - - .. code:: bash - - ./scripts/json2wal/json2wal /tmp/corrupted_wal > "$TMHOME/data/cs.wal/wal" diff --git a/docs/specification/genesis.rst b/docs/specification/genesis.rst deleted file mode 100644 index 427c88bb2..000000000 --- a/docs/specification/genesis.rst +++ /dev/null @@ -1,71 +0,0 @@ -Genesis -======= - -The genesis.json file in ``$TMHOME/config`` defines the initial TendermintCore -state upon genesis of the blockchain (`see -definition `__). - -Fields -~~~~~~ - -- ``genesis_time``: Official time of blockchain start. -- ``chain_id``: ID of the blockchain. This must be unique for every - blockchain. If your testnet blockchains do not have unique chain IDs, - you will have a bad time. -- ``validators``: -- ``pub_key``: The first element specifies the pub\_key type. 1 == - Ed25519. The second element are the pubkey bytes. -- ``power``: The validator's voting power. -- ``name``: Name of the validator (optional). -- ``app_hash``: The expected application hash (as returned by the - ``ResponseInfo`` ABCI message) upon genesis. If the app's hash does not - match, Tendermint will panic. -- ``app_state``: The application state (e.g. initial distribution of tokens). - -Sample genesis.json -~~~~~~~~~~~~~~~~~~~ - -.. code:: json - - { - "genesis_time": "2016-02-05T06:02:31.526Z", - "chain_id": "chain-tTH4mi", - "validators": [ - { - "pub_key": [ - 1, - "9BC5112CB9614D91CE423FA8744885126CD9D08D9FC9D1F42E552D662BAA411E" - ], - "power": 1, - "name": "mach1" - }, - { - "pub_key": [ - 1, - "F46A5543D51F31660D9F59653B4F96061A740FF7433E0DC1ECBC30BE8494DE06" - ], - "power": 1, - "name": "mach2" - }, - { - "pub_key": [ - 1, - "0E7B423C1635FD07C0FC3603B736D5D27953C1C6CA865BB9392CD79DE1A682BB" - ], - "power": 1, - "name": "mach3" - }, - { - "pub_key": [ - 1, - "4F49237B9A32EB50682EDD83C48CE9CDB1D02A7CFDADCFF6EC8C1FAADB358879" - ], - "power": 1, - "name": "mach4" - } - ], - "app_hash": "15005165891224E721CB664D15CB972240F5703F", - "app_state": { - {"account": "Bob", "coins": 5000} - } - } diff --git a/docs/specification/light-client-protocol.rst b/docs/specification/light-client-protocol.rst deleted file mode 100644 index 6c6083b45..000000000 --- a/docs/specification/light-client-protocol.rst +++ /dev/null @@ -1,33 +0,0 @@ -Light Client Protocol -===================== - -Light clients are an important part of the complete blockchain system -for most applications. Tendermint provides unique speed and security -properties for light client applications. - -See our `lite package -`__. - -Overview --------- - -The objective of the light client protocol is to get a -`commit <./validators.html#committing-a-block>`__ for a recent -`block hash <./block-structure.html#block-hash>`__ where the commit -includes a majority of signatures from the last known validator set. -From there, all the application state is verifiable with `merkle -proofs <./merkle.html#iavl-tree>`__. - -Properties ----------- - -- You get the full collateralized security benefits of Tendermint; No - need to wait for confirmations. -- You get the full speed benefits of Tendermint; transactions commit - instantly. -- You can get the most recent version of the application state - non-interactively (without committing anything to the blockchain). - For example, this means that you can get the most recent value of a - name from the name-registry without worrying about fork censorship - attacks, without posting a commit and waiting for confirmations. It's - fast, secure, and free! diff --git a/docs/specification/merkle.rst b/docs/specification/merkle.rst deleted file mode 100644 index 588f24a98..000000000 --- a/docs/specification/merkle.rst +++ /dev/null @@ -1,88 +0,0 @@ -Merkle -====== - -For an overview of Merkle trees, see -`wikipedia `__. - -There are two types of Merkle trees used in Tendermint. - -- **IAVL+ Tree**: An immutable self-balancing binary - tree for persistent application state -- **Simple Tree**: A simple compact binary tree for - a static list of items - -IAVL+ Tree ----------- - -The purpose of this data structure is to provide persistent storage for -key-value pairs (e.g. account state, name-registrar data, and -per-contract data) such that a deterministic merkle root hash can be -computed. The tree is balanced using a variant of the `AVL -algorithm `__ so all operations -are O(log(n)). - -Nodes of this tree are immutable and indexed by its hash. Thus any node -serves as an immutable snapshot which lets us stage uncommitted -transactions from the mempool cheaply, and we can instantly roll back to -the last committed state to process transactions of a newly committed -block (which may not be the same set of transactions as those from the -mempool). - -In an AVL tree, the heights of the two child subtrees of any node differ -by at most one. Whenever this condition is violated upon an update, the -tree is rebalanced by creating O(log(n)) new nodes that point to -unmodified nodes of the old tree. In the original AVL algorithm, inner -nodes can also hold key-value pairs. The AVL+ algorithm (note the plus) -modifies the AVL algorithm to keep all values on leaf nodes, while only -using branch-nodes to store keys. This simplifies the algorithm while -minimizing the size of merkle proofs - -In Ethereum, the analog is the `Patricia -trie `__. There are tradeoffs. -Keys do not need to be hashed prior to insertion in IAVL+ trees, so this -provides faster iteration in the key space which may benefit some -applications. The logic is simpler to implement, requiring only two -types of nodes -- inner nodes and leaf nodes. The IAVL+ tree is a binary -tree, so merkle proofs are much shorter than the base 16 Patricia trie. -On the other hand, while IAVL+ trees provide a deterministic merkle root -hash, it depends on the order of updates. In practice this shouldn't be -a problem, since you can efficiently encode the tree structure when -serializing the tree contents. - -Simple Tree ------------ - -For merkelizing smaller static lists, use the Simple Tree. The -transactions and validation signatures of a block are hashed using this -simple merkle tree logic. - -If the number of items is not a power of two, the tree will not be full -and some leaf nodes will be at different levels. Simple Tree tries to -keep both sides of the tree the same size, but the left side may be one -greater. - -:: - - Simple Tree with 6 items Simple Tree with 7 items - - * * - / \ / \ - / \ / \ - / \ / \ - / \ / \ - * * * * - / \ / \ / \ / \ - / \ / \ / \ / \ - / \ / \ / \ / \ - * h2 * h5 * * * h6 - / \ / \ / \ / \ / \ - h0 h1 h3 h4 h0 h1 h2 h3 h4 h5 - -Simple Tree with Dictionaries -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The Simple Tree is used to merkelize a list of items, so to merkelize a -(short) dictionary of key-value pairs, encode the dictionary as an -ordered list of ``KVPair`` structs. The block hash is such a hash -derived from all the fields of the block ``Header``. The state hash is -similarly derived. diff --git a/docs/specification/new-spec/README.md b/docs/specification/new-spec/README.md deleted file mode 100644 index 907ddd945..000000000 --- a/docs/specification/new-spec/README.md +++ /dev/null @@ -1 +0,0 @@ -Spec moved to [docs/spec](https://github.com/tendermint/tendermint/tree/master/docs/spec). diff --git a/docs/specification/wire-protocol.rst b/docs/specification/wire-protocol.rst deleted file mode 100644 index c0bf3b0ef..000000000 --- a/docs/specification/wire-protocol.rst +++ /dev/null @@ -1,172 +0,0 @@ -Wire Protocol -============= - -The `Tendermint wire protocol `__ -encodes data in `c-style binary <#binary>`__ and `JSON <#json>`__ form. - -Supported types ---------------- - -- Primitive types -- ``uint8`` (aka ``byte``), ``uint16``, ``uint32``, ``uint64`` -- ``int8``, ``int16``, ``int32``, ``int64`` -- ``uint``, ``int``: variable length (un)signed integers -- ``string``, ``[]byte`` -- ``time`` -- Derived types -- structs -- var-length arrays of a particular type -- fixed-length arrays of a particular type -- interfaces: registered union types preceded by a ``type byte`` -- pointers - -Binary ------- - -**Fixed-length primitive types** are encoded with 1,2,3, or 4 big-endian -bytes. - ``uint8`` (aka ``byte``), ``uint16``, ``uint32``, ``uint64``: -takes 1,2,3, and 4 bytes respectively - ``int8``, ``int16``, ``int32``, -``int64``: takes 1,2,3, and 4 bytes respectively - ``time``: ``int64`` -representation of nanoseconds since epoch - -**Variable-length integers** are encoded with a single leading byte -representing the length of the following big-endian bytes. For signed -negative integers, the most significant bit of the leading byte is a 1. - -- ``uint``: 1-byte length prefixed variable-size (0 ~ 255 bytes) - unsigned integers -- ``int``: 1-byte length prefixed variable-size (0 ~ 127 bytes) signed - integers - -NOTE: While the number 0 (zero) is encoded with a single byte ``x00``, -the number 1 (one) takes two bytes to represent: ``x0101``. This isn't -the most efficient representation, but the rules are easier to remember. - -+---------------+----------------+----------------+ -| number | binary | binary ``int`` | -| | ``uint`` | | -+===============+================+================+ -| 0 | ``x00`` | ``x00`` | -+---------------+----------------+----------------+ -| 1 | ``x0101`` | ``x0101`` | -+---------------+----------------+----------------+ -| 2 | ``x0102`` | ``x0102`` | -+---------------+----------------+----------------+ -| 256 | ``x020100`` | ``x020100`` | -+---------------+----------------+----------------+ -| 2^(127\ *8)-1 | ``x800100...`` | overflow | -| \| | | | -| ``x7FFFFF...` | | | -| ` | | | -| \| | | | -| ``x7FFFFF...` | | | -| ` | | | -| \| \| | | | -| 2^(127*\ 8) | | | -+---------------+----------------+----------------+ -| 2^(255\*8)-1 | -| \| | -| ``xFFFFFF...` | -| ` | -| \| overflow | -| \| \| -1 \| | -| n/a \| | -| ``x8101`` \| | -| \| -2 \| n/a | -| \| ``x8102`` | -| \| \| -256 \| | -| n/a \| | -| ``x820100`` | -| \| | -+---------------+----------------+----------------+ - -**Structures** are encoded by encoding the field values in order of -declaration. - -.. code:: go - - type Foo struct { - MyString string - MyUint32 uint32 - } - var foo = Foo{"626172", math.MaxUint32} - - /* The binary representation of foo: - 0103626172FFFFFFFF - 0103: `int` encoded length of string, here 3 - 626172: 3 bytes of string "bar" - FFFFFFFF: 4 bytes of uint32 MaxUint32 - */ - -**Variable-length arrays** are encoded with a leading ``int`` denoting -the length of the array followed by the binary representation of the -items. **Fixed-length arrays** are similar but aren't preceded by the -leading ``int``. - -.. code:: go - - foos := []Foo{foo, foo} - - /* The binary representation of foos: - 01020103626172FFFFFFFF0103626172FFFFFFFF - 0102: `int` encoded length of array, here 2 - 0103626172FFFFFFFF: the first `foo` - 0103626172FFFFFFFF: the second `foo` - */ - - foos := [2]Foo{foo, foo} // fixed-length array - - /* The binary representation of foos: - 0103626172FFFFFFFF0103626172FFFFFFFF - 0103626172FFFFFFFF: the first `foo` - 0103626172FFFFFFFF: the second `foo` - */ - -**Interfaces** can represent one of any number of concrete types. The -concrete types of an interface must first be declared with their -corresponding ``type byte``. An interface is then encoded with the -leading ``type byte``, then the binary encoding of the underlying -concrete type. - -NOTE: The byte ``x00`` is reserved for the ``nil`` interface value and -``nil`` pointer values. - -.. code:: go - - type Animal interface{} - type Dog uint32 - type Cat string - - RegisterInterface( - struct{ Animal }{}, // Convenience for referencing the 'Animal' interface - ConcreteType{Dog(0), 0x01}, // Register the byte 0x01 to denote a Dog - ConcreteType{Cat(""), 0x02}, // Register the byte 0x02 to denote a Cat - ) - - var animal Animal = Dog(02) - - /* The binary representation of animal: - 010102 - 01: the type byte for a `Dog` - 0102: the bytes of Dog(02) - */ - -**Pointers** are encoded with a single leading byte ``x00`` for ``nil`` -pointers, otherwise encoded with a leading byte ``x01`` followed by the -binary encoding of the value pointed to. - -NOTE: It's easy to convert pointer types into interface types, since the -``type byte`` ``x00`` is always ``nil``. - -JSON ----- - -The JSON codec is compatible with the ```binary`` <#binary>`__ codec, -and is fairly intuitive if you're already familiar with golang's JSON -encoding. Some quirks are noted below: - -- variable-length and fixed-length bytes are encoded as uppercase - hexadecimal strings -- interface values are encoded as an array of two items: - ``[type_byte, concrete_value]`` -- times are encoded as rfc2822 strings diff --git a/docs/tendermint-core/block-structure.md b/docs/tendermint-core/block-structure.md new file mode 100644 index 000000000..803805529 --- /dev/null +++ b/docs/tendermint-core/block-structure.md @@ -0,0 +1,206 @@ +# Block Structure + +The tendermint consensus engine records all agreements by a +supermajority of nodes into a blockchain, which is replicated among all +nodes. This blockchain is accessible via various rpc endpoints, mainly +`/block?height=` to get the full block, as well as +`/blockchain?minHeight=_&maxHeight=_` to get a list of headers. But what +exactly is stored in these blocks? + +## Block + +A +[Block](https://godoc.org/github.com/tendermint/tendermint/types#Block) +contains: + +- a [Header](#header) contains merkle hashes for various chain states +- the + [Data](https://godoc.org/github.com/tendermint/tendermint/types#Data) + is all transactions which are to be processed +- the [LastCommit](#commit) > 2/3 signatures for the last block + +The signatures returned along with block `H` are those validating block +`H-1`. This can be a little confusing, but we must also consider that +the `Header` also contains the `LastCommitHash`. It would be impossible +for a Header to include the commits that sign it, as it would cause an +infinite loop here. But when we get block `H`, we find +`Header.LastCommitHash`, which must match the hash of `LastCommit`. + +## Header + +The +[Header](https://godoc.org/github.com/tendermint/tendermint/types#Header) +contains lots of information (follow link for up-to-date info). Notably, +it maintains the `Height`, the `LastBlockID` (to make it a chain), and +hashes of the data, the app state, and the validator set. This is +important as the only item that is signed by the validators is the +`Header`, and all other data must be validated against one of the merkle +hashes in the `Header`. + +The `DataHash` can provide a nice check on the +[Data](https://godoc.org/github.com/tendermint/tendermint/types#Data) +returned in this same block. If you are subscribed to new blocks, via +tendermint RPC, in order to display or process the new transactions you +should at least validate that the `DataHash` is valid. If it is +important to verify autheniticity, you must wait for the `LastCommit` +from the next block to make sure the block header (including `DataHash`) +was properly signed. + +The `ValidatorHash` contains a hash of the current +[Validators](https://godoc.org/github.com/tendermint/tendermint/types#Validator). +Tracking all changes in the validator set is complex, but a client can +quickly compare this hash with the [hash of the currently known +validators](https://godoc.org/github.com/tendermint/tendermint/types#ValidatorSet.Hash) +to see if there have been changes. + +The `AppHash` serves as the basis for validating any merkle proofs that +come from the ABCI application. It represents the state of the actual +application, rather that the state of the blockchain itself. This means +it's necessary in order to perform any business logic, such as verifying +an account balance. + +**Note** After the transactions are committed to a block, they still +need to be processed in a separate step, which happens between the +blocks. If you find a given transaction in the block at height `H`, the +effects of running that transaction will be first visible in the +`AppHash` from the block header at height `H+1`. + +Like the `LastCommit` issue, this is a requirement of the immutability +of the block chain, as the application only applies transactions *after* +they are commited to the chain. + +## Commit + +The +[Commit](https://godoc.org/github.com/tendermint/tendermint/types#Commit) +contains a set of +[Votes](https://godoc.org/github.com/tendermint/tendermint/types#Vote) +that were made by the validator set to reach consensus on this block. +This is the key to the security in any PoS system, and actually no data +that cannot be traced back to a block header with a valid set of Votes +can be trusted. Thus, getting the Commit data and verifying the votes is +extremely important. + +As mentioned above, in order to find the `precommit votes` for block +header `H`, we need to query block `H+1`. Then we need to check the +votes, make sure they really are for that block, and properly formatted. +Much of this code is implemented in Go in the +[light-client](https://github.com/tendermint/light-client) package. If +you look at the code, you will notice that we need to provide the +`chainID` of the blockchain in order to properly calculate the votes. +This is to protect anyone from swapping votes between chains to fake (or +frame) a validator. Also note that this `chainID` is in the +`genesis.json` from *Tendermint*, not the `genesis.json` from the +basecoin app ([that is a different +chainID...](https://github.com/cosmos/cosmos-sdk/issues/32)). + +Once we have those votes, and we calculated the proper [sign +bytes](https://godoc.org/github.com/tendermint/tendermint/types#Vote.WriteSignBytes) +using the chainID and a [nice helper +function](https://godoc.org/github.com/tendermint/tendermint/types#SignBytes), +we can verify them. The light client is responsible for maintaining a +set of validators that we trust. Each vote only stores the validators +`Address`, as well as the `Signature`. Assuming we have a local copy of +the trusted validator set, we can look up the `Public Key` of the +validator given its `Address`, then verify that the `Signature` matches +the `SignBytes` and `Public Key`. Then we sum up the total voting power +of all validators, whose votes fulfilled all these stringent +requirements. If the total number of voting power for a single block is +greater than 2/3 of all voting power, then we can finally trust the +block header, the AppHash, and the proof we got from the ABCI +application. + +### Vote Sign Bytes + +The `sign-bytes` of a vote is produced by taking a +[stable-json](https://github.com/substack/json-stable-stringify)-like +deterministic JSON [wire](./wire-protocol.html) encoding of the vote +(excluding the `Signature` field), and wrapping it with +`{"chain_id":"my_chain","vote":...}`. + +For example, a precommit vote might have the following `sign-bytes`: + +``` +{"chain_id":"my_chain","vote":{"block_hash":"611801F57B4CE378DF1A3FFF1216656E89209A99","block_parts_header":{"hash":"B46697379DBE0774CC2C3B656083F07CA7E0F9CE","total":123},"height":1234,"round":1,"type":2}} +``` + +## Block Hash + +The [block +hash](https://godoc.org/github.com/tendermint/tendermint/types#Block.Hash) +is the [Simple Tree hash](./merkle.html#simple-tree-with-dictionaries) +of the fields of the block `Header` encoded as a list of `KVPair`s. + +## Transaction + +A transaction is any sequence of bytes. It is up to your ABCI +application to accept or reject transactions. + +## BlockID + +Many of these data structures refer to the +[BlockID](https://godoc.org/github.com/tendermint/tendermint/types#BlockID), +which is the `BlockHash` (hash of the block header, also referred to by +the next block) along with the `PartSetHeader`. The `PartSetHeader` is +explained below and is used internally to orchestrate the p2p +propogation. For clients, it is basically opaque bytes, but they must +match for all votes. + +## PartSetHeader + +The +[PartSetHeader](https://godoc.org/github.com/tendermint/tendermint/types#PartSetHeader) +contains the total number of pieces in a +[PartSet](https://godoc.org/github.com/tendermint/tendermint/types#PartSet), +and the Merkle root hash of those pieces. + +## PartSet + +PartSet is used to split a byteslice of data into parts (pieces) for +transmission. By splitting data into smaller parts and computing a +Merkle root hash on the list, you can verify that a part is legitimately +part of the complete data, and the part can be forwarded to other peers +before all the parts are known. In short, it's a fast way to securely +propagate a large chunk of data (like a block) over a gossip network. + +PartSet was inspired by the LibSwift project. + +Usage: + +``` +data := RandBytes(2 << 20) // Something large + +partSet := NewPartSetFromData(data) +partSet.Total() // Total number of 4KB parts +partSet.Count() // Equal to the Total, since we already have all the parts +partSet.Hash() // The Merkle root hash +partSet.BitArray() // A BitArray of partSet.Total() 1's + +header := partSet.Header() // Send this to the peer +header.Total // Total number of parts +header.Hash // The merkle root hash + +// Now we'll reconstruct the data from the parts +partSet2 := NewPartSetFromHeader(header) +partSet2.Total() // Same total as partSet.Total() +partSet2.Count() // Zero, since this PartSet doesn't have any parts yet. +partSet2.Hash() // Same hash as in partSet.Hash() +partSet2.BitArray() // A BitArray of partSet.Total() 0's + +// In a gossip network the parts would arrive in arbitrary order, perhaps +// in response to explicit requests for parts, or optimistically in response +// to the receiving peer's partSet.BitArray(). +for !partSet2.IsComplete() { + part := receivePartFromGossipNetwork() + added, err := partSet2.AddPart(part) + if err != nil { + // A wrong part, + // the merkle trail does not hash to partSet2.Hash() + } else if !added { + // A duplicate part already received + } +} + +data2, _ := ioutil.ReadAll(partSet2.GetReader()) +bytes.Equal(data, data2) // true +``` diff --git a/docs/tendermint-core/light-client-protocol.md b/docs/tendermint-core/light-client-protocol.md new file mode 100644 index 000000000..6d905be32 --- /dev/null +++ b/docs/tendermint-core/light-client-protocol.md @@ -0,0 +1,30 @@ +# Light Client Protocol + +Light clients are an important part of the complete blockchain system +for most applications. Tendermint provides unique speed and security +properties for light client applications. + +See our [lite +package](https://godoc.org/github.com/tendermint/tendermint/lite). + +## Overview + +The objective of the light client protocol is to get a +[commit](./validators.md#committing-a-block) for a recent [block +hash](../spec/consensus/consensus.md.md#block-hash) where the commit includes a +majority of signatures from the last known validator set. From there, +all the application state is verifiable with [merkle +proofs](./merkle.md#iavl-tree). + +## Properties + +- You get the full collateralized security benefits of Tendermint; No + need to wait for confirmations. +- You get the full speed benefits of Tendermint; transactions + commit instantly. +- You can get the most recent version of the application state + non-interactively (without committing anything to the blockchain). + For example, this means that you can get the most recent value of a + name from the name-registry without worrying about fork censorship + attacks, without posting a commit and waiting for confirmations. + It's fast, secure, and free! diff --git a/docs/tendermint-core/running-in-production.md b/docs/tendermint-core/running-in-production.md index 181d09428..094734320 100644 --- a/docs/tendermint-core/running-in-production.md +++ b/docs/tendermint-core/running-in-production.md @@ -104,6 +104,69 @@ signals we use the default behaviour in Go: [Default behavior of signals in Go programs](https://golang.org/pkg/os/signal/#hdr-Default_behavior_of_signals_in_Go_programs). +## Corruption + +**NOTE:** Make sure you have a backup of the Tendermint data directory. + +### Possible causes + +Remember that most corruption is caused by hardware issues: + +- RAID controllers with faulty / worn out battery backup, and an unexpected power loss +- Hard disk drives with write-back cache enabled, and an unexpected power loss +- Cheap SSDs with insufficient power-loss protection, and an unexpected power-loss +- Defective RAM +- Defective or overheating CPU(s) + +Other causes can be: + +- Database systems configured with fsync=off and an OS crash or power loss +- Filesystems configured to use write barriers plus a storage layer that ignores write barriers. LVM is a particular culprit. +- Tendermint bugs +- Operating system bugs +- Admin error (e.g., directly modifying Tendermint data-directory contents) + +(Source: https://wiki.postgresql.org/wiki/Corruption) + +### WAL Corruption + +If consensus WAL is corrupted at the lastest height and you are trying to start +Tendermint, replay will fail with panic. + +Recovering from data corruption can be hard and time-consuming. Here are two approaches you can take: + +1) Delete the WAL file and restart Tendermint. It will attempt to sync with other peers. +2) Try to repair the WAL file manually: + + 1. Create a backup of the corrupted WAL file: + +``` +cp "$TMHOME/data/cs.wal/wal" > /tmp/corrupted_wal_backup +``` + + 2. Use `./scripts/wal2json` to create a human-readable version + +``` +./scripts/wal2json/wal2json "$TMHOME/data/cs.wal/wal" > /tmp/corrupted_wal +``` + + 3. Search for a "CORRUPTED MESSAGE" line. + 4. By looking at the previous message and the message after the corrupted one + and looking at the logs, try to rebuild the message. If the consequent + messages are marked as corrupted too (this may happen if length header + got corrupted or some writes did not make it to the WAL ~ truncation), + then remove all the lines starting from the corrupted one and restart + Tendermint. + +``` +$EDITOR /tmp/corrupted_wal +``` + 5. After editing, convert this file back into binary form by running: + +``` +./scripts/json2wal/json2wal /tmp/corrupted_wal > "$TMHOME/data/cs.wal/wal" +``` + ## Hardware ### Processor and Memory diff --git a/docs/specification/secure-p2p.rst b/docs/tendermint-core/secure-p2p.md similarity index 72% rename from docs/specification/secure-p2p.rst rename to docs/tendermint-core/secure-p2p.md index de95f0cf0..aad5eac41 100644 --- a/docs/specification/secure-p2p.rst +++ b/docs/tendermint-core/secure-p2p.md @@ -1,12 +1,11 @@ -Secure P2P -========== +# Secure P2P The Tendermint p2p protocol uses an authenticated encryption scheme -based on the `Station-to-Station -Protocol `__. +based on the [Station-to-Station +Protocol](https://en.wikipedia.org/wiki/Station-to-Station_protocol). The implementation uses -`golang's `__ `nacl -box `__ for the actual authenticated +[golang's](https://godoc.org/golang.org/x/crypto/nacl/box) [nacl +box](http://nacl.cr.yp.to/box.html) for the actual authenticated encryption algorithm. Each peer generates an ED25519 key-pair to use as a persistent @@ -19,10 +18,9 @@ their respective ephemeral public keys. This happens in the clear. They then each compute the shared secret. The shared secret is the multiplication of the peer's ephemeral private key by the other peer's ephemeral public key. The result is the same for both peers by the magic -of `elliptic -curves `__. -The shared secret is used as the symmetric key for the encryption -algorithm. +of [elliptic +curves](https://en.wikipedia.org/wiki/Elliptic_curve_cryptography). The +shared secret is used as the symmetric key for the encryption algorithm. The two ephemeral public keys are sorted to establish a canonical order. Then a 24-byte nonce is generated by concatenating the public keys and @@ -52,8 +50,7 @@ time it is used. The communications maintain Perfect Forward Secrecy, as the persistent key pair was not used for generating secrets - only for authenticating. -Caveat ------- +## Caveat This system is still vulnerable to a Man-In-The-Middle attack if the persistent public key of the remote node is not known in advance. The @@ -62,17 +59,15 @@ such as the Web-of-Trust or Certificate Authorities. In our case, we can use the blockchain itself as a certificate authority to ensure that we are connected to at least one validator. -Config ------- +## Config Authenticated encryption is enabled by default. -Additional Reading ------------------- +## Additional Reading -- `Implementation `__ -- `Original STS paper by Whitfield Diffie, Paul C. van Oorschot and - Michael J. - Wiener `__ -- `Further work on secret - handshakes `__ +- [Implementation](https://github.com/tendermint/tendermint/blob/64bae01d007b5bee0d0827ab53259ffd5910b4e6/p2p/conn/secret_connection.go#L47) +- [Original STS paper by Whitfield Diffie, Paul C. van Oorschot and + Michael J. + Wiener](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.216.6107&rep=rep1&type=pdf) +- [Further work on secret + handshakes](https://dominictarr.github.io/secret-handshake-paper/shs.pdf) diff --git a/docs/tendermint-core/using-tendermint.md b/docs/tendermint-core/using-tendermint.md index 21280b97b..11949c798 100644 --- a/docs/tendermint-core/using-tendermint.md +++ b/docs/tendermint-core/using-tendermint.md @@ -31,6 +31,73 @@ For more elaborate initialization, see the tesnet command: tendermint testnet --help ``` +### Genesis + +The `genesis.json` file in `$TMHOME/config/` defines the initial +TendermintCore state upon genesis of the blockchain ([see +definition](https://github.com/tendermint/tendermint/blob/master/types/genesis.go)). + +#### Fields + +- `genesis_time`: Official time of blockchain start. +- `chain_id`: ID of the blockchain. This must be unique for + every blockchain. If your testnet blockchains do not have unique + chain IDs, you will have a bad time. +- `validators`: +- `pub_key`: The first element specifies the `pub_key` type. 1 + == Ed25519. The second element are the pubkey bytes. +- `power`: The validator's voting power. +- `name`: Name of the validator (optional). +- `app_hash`: The expected application hash (as returned by the + `ResponseInfo` ABCI message) upon genesis. If the app's hash does + not match, Tendermint will panic. +- `app_state`: The application state (e.g. initial distribution + of tokens). + +#### Sample genesis.json + +``` +{ + "genesis_time": "2018-07-09T22:43:06.255718641Z", + "chain_id": "chain-IAkWsK", + "validators": [ + { + "pub_key": { + "type": "tendermint/PubKeyEd25519", + "value": "oX8HhKsErMluxI0QWNSR8djQMSupDvHdAYrHwP7n73k=" + }, + "power": "1", + "name": "node0" + }, + { + "pub_key": { + "type": "tendermint/PubKeyEd25519", + "value": "UZNSJA9zmeFQj36Rs296lY+WFQ4Rt6s7snPpuKypl5I=" + }, + "power": "1", + "name": "node1" + }, + { + "pub_key": { + "type": "tendermint/PubKeyEd25519", + "value": "i9GrM6/MHB4zjCelMZBUYHNXYIzl4n0RkDCVmmLhS/o=" + }, + "power": "1", + "name": "node2" + }, + { + "pub_key": { + "type": "tendermint/PubKeyEd25519", + "value": "0qq7954l87trEqbQV9c7d1gurnjTGMxreXc848ZZ5aw=" + }, + "power": "1", + "name": "node3" + } + ], + "app_hash": "" +} +``` + ## Run To run a Tendermint node, use diff --git a/docs/specification/validators.rst b/docs/tendermint-core/validators.md similarity index 58% rename from docs/specification/validators.rst rename to docs/tendermint-core/validators.md index 085994f3d..0c1d7d89a 100644 --- a/docs/specification/validators.rst +++ b/docs/tendermint-core/validators.md @@ -1,5 +1,4 @@ -Validators -========== +# Validators Validators are responsible for committing new blocks in the blockchain. These validators participate in the consensus protocol by broadcasting @@ -19,25 +18,22 @@ to post any collateral at all. Validators have a cryptographic key-pair and an associated amount of "voting power". Voting power need not be the same. -Becoming a Validator --------------------- +## Becoming a Validator There are two ways to become validator. -1. They can be pre-established in the `genesis - state <./genesis.html>`__ -2. The ABCI app responds to the EndBlock message with changes to the - existing validator set. +1. They can be pre-established in the [genesis state](../../tendermint-core/using-tendermint.md#genesis) +2. The ABCI app responds to the EndBlock message with changes to the + existing validator set. -Committing a Block ------------------- +## Committing a Block *+2/3 is short for "more than 2/3"* -A block is committed when +2/3 of the validator set sign `precommit -votes <./block-structure.html#vote>`__ for that block at the same -``round``. The +2/3 set of precommit votes is -called a `*commit* <./block-structure.html#commit>`__. While any -+2/3 set of precommits for the same block at the same height&round can -serve as validation, the canonical commit is included in the next block -(see `LastCommit <./block-structure.html>`__). +A block is committed when +2/3 of the validator set sign [precommit +votes](../spec/blockchain/blockchain.md#vote) for that block at the same `round`. +The +2/3 set of precommit votes is called a +[*commit*](../spec/blockchain/blockchain.md#commit). While any +2/3 set of +precommits for the same block at the same height&round can serve as +validation, the canonical commit is included in the next block (see +[LastCommit](../spec/blockchain/blockchain.md#last-commit)). diff --git a/scripts/slate.sh b/scripts/slate.sh deleted file mode 100644 index e18babea7..000000000 --- a/scripts/slate.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -if [ "$CIRCLE_BRANCH" == "" ]; then - echo "this script is meant to be run on CircleCI, exiting" - echo 1 -fi - -# check for changes in the `rpc/core` directory -did_rpc_change=$(git diff --name-status $CIRCLE_BRANCH origin/master | grep rpc/core) - -if [ "$did_rpc_change" == "" ]; then - echo "no changes detected in rpc/core, exiting" - exit 0 -else - echo "changes detected in rpc/core, continuing" -fi - -# only run this script on changes to rpc/core committed to develop -if [ "$CIRCLE_BRANCH" != "master" ]; then - echo "the branch being built isn't master, exiting" - exit 0 -else - echo "on master, building the RPC docs" -fi - -# godoc2md used to convert the go documentation from -# `rpc/core` into a markdown file consumed by Slate -go get github.com/davecheney/godoc2md - -# slate works via forks, and we'll be committing to -# master branch, which will trigger our fork to run -# the `./deploy.sh` and publish via the `gh-pages` branch -slate_repo=github.com/tendermint/slate -slate_path="$GOPATH"/src/"$slate_repo" - -if [ ! -d "$slate_path" ]; then - git clone https://"$slate_repo".git $slate_path -fi - -# the main file we need to update if rpc/core changed -destination="$slate_path"/source/index.html.md - -# we remove it then re-create it with the latest changes -rm $destination - -header="--- -title: RPC Reference - -language_tabs: - - shell - - go - -toc_footers: - - Tendermint - - Documentation Powered by Slate - -search: true ----" - -# write header to the main slate file -echo "$header" > "$destination" - -# generate a markdown from the godoc comments, using a template -rpc_docs=$(godoc2md -template rpc/core/doc_template.txt github.com/tendermint/tendermint/rpc/core | grep -v -e "pipe.go" -e "routes.go" -e "dev.go" | sed 's$/src/target$https://github.com/tendermint/tendermint/tree/master/rpc/core$') - -# append core RPC docs -echo "$rpc_docs" >> "$destination" - -# commit the changes -cd $slate_path - -git config --global user.email "github@tendermint.com" -git config --global user.name "tenderbot" - -git commit -a -m "Update tendermint RPC docs via CircleCI" -git push -q https://${GITHUB_ACCESS_TOKEN}@github.com/tendermint/slate.git master diff --git a/tools/tm-bench/Gopkg.lock b/tools/tm-bench/Gopkg.lock index 175acb3a6..aa1d819ca 100644 --- a/tools/tm-bench/Gopkg.lock +++ b/tools/tm-bench/Gopkg.lock @@ -119,7 +119,7 @@ "prometheus", "prometheus/promhttp" ] - revision = "ae27198cdd90bf12cd134ad79d1366a6cf49f632" + revision = "ee1c9d7e23df7f011bdf6f12a5c9e7f0ae10a1fe" [[projects]] branch = "master" @@ -237,7 +237,7 @@ "types", "version" ] - revision = "9d81a74429e093f3167875e0145ad957874c77d1" + revision = "f5ad8ef8600c33532a16de0879ff6b9745bb394d" [[projects]] branch = "master" @@ -268,7 +268,7 @@ "netutil", "trace" ] - revision = "292b43bbf7cb8d35ddf40f8d5100ef3837cced3f" + revision = "039a4258aec0ad3c79b905677cceeab13b296a77" [[projects]] name = "golang.org/x/text" @@ -292,10 +292,9 @@ version = "v0.3.0" [[projects]] - branch = "master" name = "google.golang.org/genproto" packages = ["googleapis/rpc/status"] - revision = "e92b116572682a5b432ddd840aeaba2a559eeff1" + revision = "7fd901a49ba6a7f87732eb344f6e3c5b19d1b200" [[projects]] name = "google.golang.org/grpc" @@ -330,6 +329,6 @@ [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "bc54a74ffdfc09872726fcf5c72b5df882269dc1cd949ac3fbeac9a554fc25c6" + inputs-digest = "5c21a60b80ac7d60f7be693de13f9fadb62226b502431bdb38fb9794a98c5b02" solver-name = "gps-cdcl" solver-version = 1 diff --git a/tools/tm-bench/Gopkg.toml b/tools/tm-bench/Gopkg.toml index 18498cbbb..3b2dfa4ec 100644 --- a/tools/tm-bench/Gopkg.toml +++ b/tools/tm-bench/Gopkg.toml @@ -45,6 +45,11 @@ name = "github.com/tendermint/tendermint" branch = "develop" +# this got updated and broke, so locked to an old working commit ... +[[override]] + name = "google.golang.org/genproto" + revision = "7fd901a49ba6a7f87732eb344f6e3c5b19d1b200" + [prune] go-tests = true unused-packages = true diff --git a/tools/tm-bench/README.md b/tools/tm-bench/README.md index 811141629..000f20f37 100644 --- a/tools/tm-bench/README.md +++ b/tools/tm-bench/README.md @@ -51,15 +51,26 @@ with the last command being in a seperate window. ## How stats are collected These stats are derived by having each connection send transactions at the -specified rate (or as close as it can get) for the specified time. After the -specified time, it iterates over all of the blocks that were created in that -time. The average and stddev per second are computed based off of that, by +specified rate (or as close as it can get) for the specified time. +After the specified time, it iterates over all of the blocks that were created +in that time. +The average and stddev per second are computed based off of that, by grouping the data by second. To send transactions at the specified rate in each connection, we loop -through the number of transactions. If its too slow, the loop stops at one second. -If its too fast, we wait until the one second mark ends. The transactions per -second stat is computed based off of what ends up in the block. +through the number of transactions. +If its too slow, the loop stops at one second. +If its too fast, we wait until the one second mark ends. +The transactions per second stat is computed based off of what ends up in the +block. + +Note that there will be edge effects on the number of transactions in the first +and last blocks. +This is because transactions may start sending midway through when tendermint +starts building the next block, so it only has half as much time to gather txs +that tm-bench sends. +Similarly the end of the duration will likely end mid-way through tendermint +trying to build the next block. Each of the connections is handled via two separate goroutines. diff --git a/tools/tm-bench/main.go b/tools/tm-bench/main.go index 4bc67ab38..5f597cecb 100644 --- a/tools/tm-bench/main.go +++ b/tools/tm-bench/main.go @@ -7,6 +7,7 @@ import ( "math" "os" "strings" + "sync" "text/tabwriter" "time" @@ -25,13 +26,13 @@ type statistics struct { } func main() { - var duration, txsRate, connections, txSize int + var durationInt, txsRate, connections, txSize int var verbose bool var outputFormat, broadcastTxMethod string flagSet := flag.NewFlagSet("tm-bench", flag.ExitOnError) flagSet.IntVar(&connections, "c", 1, "Connections to keep open per endpoint") - flagSet.IntVar(&duration, "T", 10, "Exit after the specified amount of time in seconds") + flagSet.IntVar(&durationInt, "T", 10, "Exit after the specified amount of time in seconds") flagSet.IntVar(&txsRate, "r", 1000, "Txs per second to send in a connection") flagSet.IntVar(&txSize, "s", 250, "The size of a transaction in bytes.") flagSet.StringVar(&outputFormat, "output-format", "plain", "Output format: plain or json") @@ -42,7 +43,7 @@ func main() { fmt.Println(`Tendermint blockchain benchmarking tool. Usage: - tm-bench [-c 1] [-T 10] [-r 1000] [endpoints] [-output-format [-broadcast-tx-method ]] + tm-bench [-c 1] [-T 10] [-r 1000] [-s 250] [endpoints] [-output-format [-broadcast-tx-method ]] Examples: tm-bench localhost:26657`) @@ -73,7 +74,7 @@ Examples: } logger = log.NewTMLoggerWithColorFn(log.NewSyncWriter(os.Stdout), colorFn) - fmt.Printf("Running %ds test @ %s\n", duration, flagSet.Arg(0)) + fmt.Printf("Running %ds test @ %s\n", durationInt, flagSet.Arg(0)) } if broadcastTxMethod != "async" && @@ -93,10 +94,6 @@ Examples: ) logger.Info("Latest block height", "h", initialHeight) - // record time start - timeStart := time.Now() - logger.Info("Time started", "t", timeStart) - transacters := startTransacters( endpoints, connections, @@ -104,9 +101,17 @@ Examples: txSize, "broadcast_tx_"+broadcastTxMethod, ) - endTime := time.Duration(duration) * time.Second - <-time.After(endTime) + // Wait until transacters have begun until we get the start time + timeStart := time.Now() + logger.Info("Time last transacter started", "t", timeStart) + + duration := time.Duration(durationInt) * time.Second + + timeEnd := timeStart.Add(duration) + logger.Info("End time for calculation", "t", timeEnd) + + <-time.After(duration) for i, t := range transacters { t.Stop() numCrashes := countCrashes(t.connsBroken) @@ -115,15 +120,14 @@ Examples: } } - timeStop := time.Now() - logger.Info("Time stopped", "t", timeStop) + logger.Debug("Time all transacters stopped", "t", time.Now()) stats, err := calculateStatistics( client, initialHeight, timeStart, - timeStop, - duration, + timeEnd, + durationInt, ) if err != nil { fmt.Fprintln(os.Stderr, err) @@ -202,7 +206,7 @@ func calculateStatistics( } // iterates from max height to min height - for _, blockMeta := range blockMetas { + for i, blockMeta := range blockMetas { // check if block was created after timeStart if blockMeta.Header.Time.Before(timeStart) { break @@ -219,6 +223,7 @@ func calculateStatistics( // increase number of txs for that second numTxsPerSec[sec] += blockMeta.Header.NumTxs + logger.Debug(fmt.Sprintf("%d txs in block %d, height %d", blockMeta.Header.NumTxs, i, blockMeta.Header.Height)) } for _, n := range numBlocksPerSec { @@ -245,15 +250,21 @@ func startTransacters( ) []*transacter { transacters := make([]*transacter, len(endpoints)) + wg := sync.WaitGroup{} + wg.Add(len(endpoints)) for i, e := range endpoints { t := newTransacter(e, connections, txsRate, txSize, broadcastTxMethod) t.SetLogger(logger) - if err := t.Start(); err != nil { - fmt.Fprintln(os.Stderr, err) - os.Exit(1) - } - transacters[i] = t + go func(i int) { + defer wg.Done() + if err := t.Start(); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + transacters[i] = t + }(i) } + wg.Wait() return transacters } diff --git a/tools/tm-bench/transacter.go b/tools/tm-bench/transacter.go index de408136d..2834727b5 100644 --- a/tools/tm-bench/transacter.go +++ b/tools/tm-bench/transacter.go @@ -36,7 +36,8 @@ type transacter struct { conns []*websocket.Conn connsBroken []bool - wg sync.WaitGroup + startingWg sync.WaitGroup + endingWg sync.WaitGroup stopped bool logger log.Logger @@ -75,19 +76,22 @@ func (t *transacter) Start() error { t.conns[i] = c } - t.wg.Add(2 * t.Connections) + t.startingWg.Add(t.Connections) + t.endingWg.Add(2 * t.Connections) for i := 0; i < t.Connections; i++ { go t.sendLoop(i) go t.receiveLoop(i) } + t.startingWg.Wait() + return nil } // Stop closes the connections. func (t *transacter) Stop() { t.stopped = true - t.wg.Wait() + t.endingWg.Wait() for _, c := range t.conns { c.Close() } @@ -97,7 +101,7 @@ func (t *transacter) Stop() { // `broadcast_tx_async`). func (t *transacter) receiveLoop(connIndex int) { c := t.conns[connIndex] - defer t.wg.Done() + defer t.endingWg.Done() for { _, _, err := c.ReadMessage() if err != nil { @@ -118,6 +122,13 @@ func (t *transacter) receiveLoop(connIndex int) { // sendLoop generates transactions at a given rate. func (t *transacter) sendLoop(connIndex int) { + started := false + // Close the starting waitgroup, in the event that this fails to start + defer func() { + if !started { + t.startingWg.Done() + } + }() c := t.conns[connIndex] c.SetPingHandler(func(message string) error { @@ -139,7 +150,7 @@ func (t *transacter) sendLoop(connIndex int) { defer func() { pingsTicker.Stop() txsTicker.Stop() - t.wg.Done() + t.endingWg.Done() }() // hash of the host name is a part of each tx @@ -156,6 +167,10 @@ func (t *transacter) sendLoop(connIndex int) { startTime := time.Now() endTime := startTime.Add(time.Second) numTxSent := t.Rate + if !started { + t.startingWg.Done() + started = true + } for i := 0; i < t.Rate; i++ { // each transaction embeds connection index, tx number and hash of the hostname diff --git a/version/version.go b/version/version.go index f9faedf04..4e677b5f4 100644 --- a/version/version.go +++ b/version/version.go @@ -4,13 +4,13 @@ package version const ( Maj = "0" Min = "22" - Fix = "2" + Fix = "3" ) var ( // Version is the current version of Tendermint // Must be a string because scripts like dist.sh read this file. - Version = "0.22.2-dev" + Version = "0.22.3" // GitCommit is the current HEAD set using ldflags. GitCommit string