diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 19e106238..392b61da1 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -7,7 +7,6 @@ Special thanks to external contributors on this release: ### BREAKING CHANGES: * CLI/RPC/Config -- [types] consistent field order of `CanonicalVote` and `CanonicalProposal` * Apps @@ -16,6 +15,11 @@ Special thanks to external contributors on this release: * Blockchain Protocol * [merkle] \#2713 Merkle trees now match the RFC 6962 specification + * [types] \#3078 Re-order Timestamp and BlockID in CanonicalVote so it's + consistent with CanonicalProposal (BlockID comes + first) + * [types] \#3165 Hash of ConsensusParams only includes BlockSize.MaxBytes and + BlockSize.MaxGas * P2P Protocol - [consensus] \#2960 normalize priorities to not exceed `2*TotalVotingPower` to mitigate unfair proposer selection @@ -24,8 +28,8 @@ Special thanks to external contributors on this release: ### FEATURES: ### IMPROVEMENTS: -- [rpc] \#3065 return maxPerPage (100), not defaultPerPage (30) if `per_page` is greater than the max 100. -- [instrumentation] \#3082 add 'chain_id' label for all metrics +- [rpc] \#3065 Return maxPerPage (100), not defaultPerPage (30) if `per_page` is greater than the max 100. +- [instrumentation] \#3082 Add `chain_id` label for all metrics ### BUG FIXES: - [log] \#3060 Fix year format diff --git a/docs/spec/blockchain/blockchain.md b/docs/spec/blockchain/blockchain.md index f80c8c05f..92b55e35f 100644 --- a/docs/spec/blockchain/blockchain.md +++ b/docs/spec/blockchain/blockchain.md @@ -51,7 +51,7 @@ type Header struct { // hashes of block data LastCommitHash []byte // commit from validators from the last block - DataHash []byte // Merkle root of transactions + DataHash []byte // MerkleRoot of transactions // hashes from the app output from the prev block ValidatorsHash []byte // validators for the current block @@ -83,25 +83,27 @@ type Version struct { ## BlockID The `BlockID` contains two distinct Merkle roots of the block. -The first, used as the block's main hash, is the Merkle root -of all the fields in the header. The second, used for secure gossipping of -the block during consensus, is the Merkle root of the complete serialized block -cut into parts. The `BlockID` includes these two hashes, as well as the number of -parts. +The first, used as the block's main hash, is the MerkleRoot +of all the fields in the header (ie. `MerkleRoot(header)`. +The second, used for secure gossipping of the block during consensus, +is the MerkleRoot of the complete serialized block +cut into parts (ie. `MerkleRoot(MakeParts(block))`). +The `BlockID` includes these two hashes, as well as the number of +parts (ie. `len(MakeParts(block))`) ```go type BlockID struct { Hash []byte - Parts PartsHeader + PartsHeader PartSetHeader } -type PartsHeader struct { - Hash []byte +type PartSetHeader struct { Total int32 + Hash []byte } ``` -TODO: link to details of merkle sums. +See [MerkleRoot](/docs/spec/blockchain/encoding.md#MerkleRoot) for details. ## Time @@ -142,12 +144,12 @@ The vote includes information about the validator signing it. ```go type Vote struct { - Type SignedMsgType // byte + Type byte Height int64 Round int - Timestamp time.Time BlockID BlockID - ValidatorAddress Address + Timestamp Time + ValidatorAddress []byte ValidatorIndex int Signature []byte } @@ -160,8 +162,8 @@ a _precommit_ has `vote.Type == 2`. ## Signature Signatures in Tendermint are raw bytes representing the underlying signature. -The only signature scheme currently supported for Tendermint validators is -ED25519. The signature is the raw 64-byte ED25519 signature. + +See the [signature spec](/docs/spec/blockchain/encoding.md#key-types) for more. ## EvidenceData @@ -188,6 +190,8 @@ type DuplicateVoteEvidence struct { } ``` +See the [pubkey spec](/docs/spec/blockchain/encoding.md#key-types) for more. + ## Validation Here we describe the validation rules for every element in a block. @@ -205,7 +209,7 @@ the current version of the `state` corresponds to the state after executing transactions from the `prevBlock`. Elements of an object are accessed as expected, ie. `block.Header`. -See [here](https://github.com/tendermint/tendermint/blob/master/docs/spec/blockchain/state.md) for the definition of `state`. +See the [definition of `State`](/docs/spec/blockchain/state.md). ### Header @@ -284,28 +288,25 @@ The first block has `block.Header.TotalTxs = block.Header.NumberTxs`. LastBlockID is the previous block's BlockID: ```go -prevBlockParts := MakeParts(prevBlock, state.LastConsensusParams.BlockGossip.BlockPartSize) +prevBlockParts := MakeParts(prevBlock) block.Header.LastBlockID == BlockID { - Hash: SimpleMerkleRoot(prevBlock.Header), + Hash: MerkleRoot(prevBlock.Header), PartsHeader{ - Hash: SimpleMerkleRoot(prevBlockParts), + Hash: MerkleRoot(prevBlockParts), Total: len(prevBlockParts), }, } ``` -Note: it depends on the ConsensusParams, -which are held in the `state` and may be updated by the application. - The first block has `block.Header.LastBlockID == BlockID{}`. ### LastCommitHash ```go -block.Header.LastCommitHash == SimpleMerkleRoot(block.LastCommit) +block.Header.LastCommitHash == MerkleRoot(block.LastCommit) ``` -Simple Merkle root of the votes included in the block. +MerkleRoot of the votes included in the block. These are the votes that committed the previous block. The first block has `block.Header.LastCommitHash == []byte{}` @@ -313,37 +314,37 @@ The first block has `block.Header.LastCommitHash == []byte{}` ### DataHash ```go -block.Header.DataHash == SimpleMerkleRoot(block.Txs.Txs) +block.Header.DataHash == MerkleRoot(block.Txs.Txs) ``` -Simple Merkle root of the transactions included in the block. +MerkleRoot of the transactions included in the block. ### ValidatorsHash ```go -block.ValidatorsHash == SimpleMerkleRoot(state.Validators) +block.ValidatorsHash == MerkleRoot(state.Validators) ``` -Simple Merkle root of the current validator set that is committing the block. +MerkleRoot of the current validator set that is committing the block. This can be used to validate the `LastCommit` included in the next block. ### NextValidatorsHash ```go -block.NextValidatorsHash == SimpleMerkleRoot(state.NextValidators) +block.NextValidatorsHash == MerkleRoot(state.NextValidators) ``` -Simple Merkle root of the next validator set that will be the validator set that commits the next block. +MerkleRoot of the next validator set that will be the validator set that commits the next block. This is included so that the current validator set gets a chance to sign the next validator sets Merkle root. -### ConsensusParamsHash +### ConsensusHash ```go -block.ConsensusParamsHash == TMHASH(amino(state.ConsensusParams)) +block.ConsensusHash == state.ConsensusParams.Hash() ``` -Hash of the amino-encoded consensus parameters. +Hash of the amino-encoding of a subset of the consensus parameters. ### AppHash @@ -358,20 +359,20 @@ The first block has `block.Header.AppHash == []byte{}`. ### LastResultsHash ```go -block.ResultsHash == SimpleMerkleRoot(state.LastResults) +block.ResultsHash == MerkleRoot(state.LastResults) ``` -Simple Merkle root of the results of the transactions in the previous block. +MerkleRoot of the results of the transactions in the previous block. The first block has `block.Header.ResultsHash == []byte{}`. ## EvidenceHash ```go -block.EvidenceHash == SimpleMerkleRoot(block.Evidence) +block.EvidenceHash == MerkleRoot(block.Evidence) ``` -Simple Merkle root of the evidence of Byzantine behaviour included in this block. +MerkleRoot of the evidence of Byzantine behaviour included in this block. ### ProposerAddress diff --git a/docs/spec/blockchain/encoding.md b/docs/spec/blockchain/encoding.md index aefe1e7f7..9552ab073 100644 --- a/docs/spec/blockchain/encoding.md +++ b/docs/spec/blockchain/encoding.md @@ -30,6 +30,12 @@ For example, the byte-array `[0xA, 0xB]` would be encoded as `0x020A0B`, while a byte-array containing 300 entires beginning with `[0xA, 0xB, ...]` would be encoded as `0xAC020A0B...` where `0xAC02` is the UVarint encoding of 300. +## Hashing + +Tendermint uses `SHA256` as its hash function. +Objects are always Amino encoded before being hashed. +So `SHA256(obj)` is short for `SHA256(AminoEncode(obj))`. + ## Public Key Cryptography Tendermint uses Amino to distinguish between different types of private keys, @@ -68,23 +74,27 @@ For example, the 33-byte (or 0x21-byte in hex) Secp256k1 pubkey would be encoded as `EB5AE98721020BD40F225A57ED383B440CF073BC5539D0341F5767D2BF2D78406D00475A2EE9` -### Addresses +### Key Types -Addresses for each public key types are computed as follows: +Each type specifies it's own pubkey, address, and signature format. #### Ed25519 -First 20-bytes of the SHA256 hash of the raw 32-byte public key: +TODO: pubkey + +The address is the first 20-bytes of the SHA256 hash of the raw 32-byte public key: ``` address = SHA256(pubkey)[:20] ``` -NOTE: before v0.22.0, this was the RIPEMD160 of the Amino encoded public key. +The signature is the raw 64-byte ED25519 signature. #### Secp256k1 -RIPEMD160 hash of the SHA256 hash of the OpenSSL compressed public key: +TODO: pubkey + +The address is the RIPEMD160 hash of the SHA256 hash of the OpenSSL compressed public key: ``` address = RIPEMD160(SHA256(pubkey)) @@ -92,12 +102,21 @@ address = RIPEMD160(SHA256(pubkey)) This is the same as Bitcoin. +The signature is the 64-byte concatenation of ECDSA `r` and `s` (ie. `r || s`), +where `s` is lexicographically less than its inverse, to prevent malleability. +This is like Ethereum, but without the extra byte for pubkey recovery, since +Tendermint assumes the pubkey is always provided anyway. + +#### Multisig + +TODO + ## Other Common Types ### BitArray -The BitArray is used in block headers and some consensus messages to signal -whether or not something was done by each validator. BitArray is represented +The BitArray is used in some consensus messages to represent votes received from +validators, or parts received in a block. It is represented with a struct containing the number of bits (`Bits`) and the bit-array itself encoded in base64 (`Elems`). @@ -119,24 +138,27 @@ representing `1` and `0`. Ie. the BitArray `10110` would be JSON encoded as Part is used to break up blocks into pieces that can be gossiped in parallel and securely verified using a Merkle tree of the parts. -Part contains the index of the part in the larger set (`Index`), the actual -underlying data of the part (`Bytes`), and a simple Merkle proof that the part is contained in -the larger set (`Proof`). +Part contains the index of the part (`Index`), the actual +underlying data of the part (`Bytes`), and a Merkle proof that the part is contained in +the set (`Proof`). ```go type Part struct { Index int - Bytes byte[] - Proof byte[] + Bytes []byte + Proof SimpleProof } ``` +See details of SimpleProof, below. + ### MakeParts Encode an object using Amino and slice it into parts. +Tendermint uses a part size of 65536 bytes. ```go -func MakeParts(obj interface{}, partSize int) []Part +func MakeParts(block Block) []Part ``` ## Merkle Trees @@ -144,12 +166,12 @@ func MakeParts(obj interface{}, partSize int) []Part For an overview of Merkle trees, see [wikipedia](https://en.wikipedia.org/wiki/Merkle_tree) -We use the RFC 6962 specification of a merkle tree, instantiated with sha256 as the hash function. +We use the RFC 6962 specification of a merkle tree, with sha256 as the hash function. Merkle trees are used throughout Tendermint to compute a cryptographic digest of a data structure. The differences between RFC 6962 and the simplest form a merkle tree are that: -1) leaf nodes and inner nodes have different hashes. - This is to prevent a proof to an inner node, claiming that it is the hash of the leaf. +1) leaf nodes and inner nodes have different hashes. + This is for "second pre-image resistance", to prevent the proof to an inner node being valid as the proof of a leaf. The leaf nodes are `SHA256(0x00 || leaf_data)`, and inner nodes are `SHA256(0x01 || left_hash || right_hash)`. 2) When the number of items isn't a power of two, the left half of the tree is as big as it could be. @@ -173,46 +195,64 @@ The differences between RFC 6962 and the simplest form a merkle tree are that: h0 h1 h2 h3 h0 h1 h2 h3 h4 h5 ``` -### Simple Merkle Root +### MerkleRoot The function `MerkleRoot` is a simple recursive function defined as follows: ```go -func MerkleRootFromLeafs(leafs [][]byte) []byte{ +// SHA256(0x00 || leaf) +func leafHash(leaf []byte) []byte { + return tmhash.Sum(append(0x00, leaf...)) +} + +// SHA256(0x01 || left || right) +func innerHash(left []byte, right []byte) []byte { + return tmhash.Sum(append(0x01, append(left, right...)...)) +} + +// largest power of 2 less than k +func getSplitPoint(k int) { ... } + +func MerkleRoot(leafs [][]byte) []byte{ switch len(items) { case 0: return nil case 1: - return leafHash(leafs[0]) // SHA256(0x00 || leafs[0]) + return leafHash(leafs[0]) default: - k := getSplitPoint(len(items)) // largest power of two smaller than items - left := MerkleRootFromLeafs(items[:k]) - right := MerkleRootFromLeafs(items[k:]) - return innerHash(left, right) // SHA256(0x01 || left || right) + k := getSplitPoint(len(items)) + left := MerkleRoot(items[:k]) + right := MerkleRoot(items[k:]) + return innerHash(left, right) } } ``` -Note: we will abuse notion and invoke `SimpleMerkleRoot` with arguments of type `struct` or type `[]struct`. +Note: we will abuse notion and invoke `MerkleRoot` with arguments of type `struct` or type `[]struct`. For `struct` arguments, we compute a `[][]byte` containing the hash of each field in the struct, in the same order the fields appear in the struct. For `[]struct` arguments, we compute a `[][]byte` by hashing the individual `struct` elements. ### Simple Merkle Proof -Proof that a leaf is in a Merkle tree consists of a simple structure: +Proof that a leaf is in a Merkle tree is composed as follows: ```golang type SimpleProof struct { + Total int + Index int + LeafHash []byte Aunts [][]byte } ``` -Which is verified using the following: +Which is verified as follows: ```golang -func (proof SimpleProof) Verify(index, total int, leafHash, rootHash []byte) bool { - computedHash := computeHashFromAunts(index, total, leafHash, proof.Aunts) +func (proof SimpleProof) Verify(rootHash []byte, leaf []byte) bool { + assert(proof.LeafHash, leafHash(leaf) + + computedHash := computeHashFromAunts(proof.Index, proof.Total, proof.LeafHash, proof.Aunts) return computedHash == rootHash } @@ -230,22 +270,14 @@ func computeHashFromAunts(index, total int, leafHash []byte, innerHashes [][]byt if index < numLeft { leftHash := computeHashFromAunts(index, numLeft, leafHash, innerHashes[:len(innerHashes)-1]) assert(leftHash != nil) - return SimpleHashFromTwoHashes(leftHash, innerHashes[len(innerHashes)-1]) + return innerHash(leftHash, innerHashes[len(innerHashes)-1]) } rightHash := computeHashFromAunts(index-numLeft, total-numLeft, leafHash, innerHashes[:len(innerHashes)-1]) assert(rightHash != nil) - return SimpleHashFromTwoHashes(innerHashes[len(innerHashes)-1], rightHash) + return innerHash(innerHashes[len(innerHashes)-1], rightHash) } ``` -### Simple Tree with Dictionaries - -The Simple Tree is used to merkelize a list of items, so to merkelize a -(short) dictionary of key-value pairs, encode the dictionary as an -ordered list of `KVPair` structs. The block hash is such a hash -derived from all the fields of the block `Header`. The state hash is -similarly derived. - ### IAVL+ Tree Because Tendermint only uses a Simple Merkle Tree, application developers are expect to use their own Merkle tree in their applications. For example, the IAVL+ Tree - an immutable self-balancing binary tree for persisting application state is used by the [Cosmos SDK](https://github.com/cosmos/cosmos-sdk/blob/develop/docs/sdk/core/multistore.md) @@ -297,4 +329,6 @@ type CanonicalVote struct { The field ordering and the fixed sized encoding for the first three fields is optimized to ease parsing of SignBytes in HSMs. It creates fixed offsets for relevant fields that need to be read in this context. -See [#1622](https://github.com/tendermint/tendermint/issues/1622) for more details. +For more details, see the [signing spec](/docs/spec/consensus/signing.md). +Also, see the motivating discussion in +[#1622](https://github.com/tendermint/tendermint/issues/1622). diff --git a/docs/spec/blockchain/state.md b/docs/spec/blockchain/state.md index 0b46e5035..ff6fcf2e4 100644 --- a/docs/spec/blockchain/state.md +++ b/docs/spec/blockchain/state.md @@ -78,6 +78,8 @@ func TotalVotingPower(vals []Validators) int64{ ConsensusParams define various limits for blockchain data structures. Like validator sets, they are set during genesis and can be updated by the application through ABCI. +When hashed, only a subset of the params are included, to allow the params to +evolve without breaking the header. ```go type ConsensusParams struct { @@ -86,6 +88,18 @@ type ConsensusParams struct { Validator } +type hashedParams struct { + BlockMaxBytes int64 + BlockMaxGas int64 +} + +func (params ConsensusParams) Hash() []byte { + SHA256(hashedParams{ + BlockMaxBytes: params.BlockSize.MaxBytes, + BlockMaxGas: params.BlockSize.MaxGas, + }) +} + type BlockSize struct { MaxBytes int64 MaxGas int64 diff --git a/types/params.go b/types/params.go index 91079e76b..03e43c191 100644 --- a/types/params.go +++ b/types/params.go @@ -22,6 +22,14 @@ type ConsensusParams struct { Validator ValidatorParams `json:"validator"` } +// HashedParams is a subset of ConsensusParams. +// It is amino encoded and hashed into +// the Header.ConsensusHash. +type HashedParams struct { + BlockMaxBytes int64 + BlockMaxGas int64 +} + // BlockSizeParams define limits on the block size. type BlockSizeParams struct { MaxBytes int64 `json:"max_bytes"` @@ -116,13 +124,16 @@ func (params *ConsensusParams) Validate() error { return nil } -// Hash returns a hash of the parameters to store in the block header -// No Merkle tree here, only three values are hashed here -// thus benefit from saving space < drawbacks from proofs' overhead -// Revisit this function if new fields are added to ConsensusParams +// Hash returns a hash of a subset of the parameters to store in the block header. +// Only the Block.MaxBytes and Block.MaxGas are included in the hash. +// This allows the ConsensusParams to evolve more without breaking the block +// protocol. No need for a Merkle tree here, just a small struct to hash. func (params *ConsensusParams) Hash() []byte { hasher := tmhash.New() - bz := cdcEncode(params) + bz := cdcEncode(HashedParams{ + params.BlockSize.MaxBytes, + params.BlockSize.MaxGas, + }) if bz == nil { panic("cannot fail to encode ConsensusParams") }