From 7a8aeff4b0f5fa7d9113315e2c2ccf64883a1f90 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Mon, 21 Jan 2019 10:02:57 -0500 Subject: [PATCH] update spec for Merkle RFC 6962 (#3175) * spec: specify when MerkleRoot is on hashes * remove unnecessary hash methods * update changelog * fix test --- CHANGELOG_PENDING.md | 5 ++++- docs/spec/blockchain/blockchain.md | 13 +++++++++---- docs/spec/blockchain/encoding.md | 16 +++++++++++++--- docs/spec/blockchain/state.md | 2 +- types/results.go | 11 +++-------- types/results_test.go | 19 ++++++++++--------- types/validator.go | 8 -------- 7 files changed, 40 insertions(+), 34 deletions(-) diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index af1c5566b..5a425f8b3 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -12,9 +12,12 @@ Special thanks to external contributors on this release: * Go API - [node] \#3082 MetricsProvider now requires you to pass a chain ID +- [types] \#2713 Rename `TxProof.LeafHash` to `TxProof.Leaf` +- [crypto/merkle] \#2713 `SimpleProof.Verify` takes a `leaf` instead of a + `leafHash` and performs the hashing itself * Blockchain Protocol - * [merkle] \#2713 Merkle trees now match the RFC 6962 specification + * [crypto/merkle] \#2713 Merkle trees now match the RFC 6962 specification * [types] \#3078 Re-order Timestamp and BlockID in CanonicalVote so it's consistent with CanonicalProposal (BlockID comes first) diff --git a/docs/spec/blockchain/blockchain.md b/docs/spec/blockchain/blockchain.md index 92b55e35f..00cccfc2e 100644 --- a/docs/spec/blockchain/blockchain.md +++ b/docs/spec/blockchain/blockchain.md @@ -51,7 +51,7 @@ type Header struct { // hashes of block data LastCommitHash []byte // commit from validators from the last block - DataHash []byte // MerkleRoot of transactions + DataHash []byte // MerkleRoot of transaction hashes // hashes from the app output from the prev block ValidatorsHash []byte // validators for the current block @@ -303,7 +303,7 @@ The first block has `block.Header.LastBlockID == BlockID{}`. ### LastCommitHash ```go -block.Header.LastCommitHash == MerkleRoot(block.LastCommit) +block.Header.LastCommitHash == MerkleRoot(block.LastCommit.Precommits) ``` MerkleRoot of the votes included in the block. @@ -314,10 +314,15 @@ The first block has `block.Header.LastCommitHash == []byte{}` ### DataHash ```go -block.Header.DataHash == MerkleRoot(block.Txs.Txs) +block.Header.DataHash == MerkleRoot(Hashes(block.Txs.Txs)) ``` -MerkleRoot of the transactions included in the block. +MerkleRoot of the hashes of transactions included in the block. + +Note the transactions are hashed before being included in the Merkle tree, +so the leaves of the Merkle tree are the hashes, not the transactions +themselves. This is because transaction hashes are regularly used as identifiers for +transactions. ### ValidatorsHash diff --git a/docs/spec/blockchain/encoding.md b/docs/spec/blockchain/encoding.md index 9552ab073..1b999335b 100644 --- a/docs/spec/blockchain/encoding.md +++ b/docs/spec/blockchain/encoding.md @@ -213,7 +213,7 @@ func innerHash(left []byte, right []byte) []byte { // largest power of 2 less than k func getSplitPoint(k int) { ... } -func MerkleRoot(leafs [][]byte) []byte{ +func MerkleRoot(items [][]byte) []byte{ switch len(items) { case 0: return nil @@ -228,10 +228,20 @@ func MerkleRoot(leafs [][]byte) []byte{ } ``` +Note: `MerkleRoot` operates on items which are arbitrary byte arrays, not +necessarily hashes. For items which need to be hashed first, we introduce the +`Hashes` function: + +``` +func Hashes(items [][]byte) [][]byte { + return SHA256 of each item +} +``` + Note: we will abuse notion and invoke `MerkleRoot` with arguments of type `struct` or type `[]struct`. -For `struct` arguments, we compute a `[][]byte` containing the hash of each +For `struct` arguments, we compute a `[][]byte` containing the amino encoding of each field in the struct, in the same order the fields appear in the struct. -For `[]struct` arguments, we compute a `[][]byte` by hashing the individual `struct` elements. +For `[]struct` arguments, we compute a `[][]byte` by amino encoding the individual `struct` elements. ### Simple Merkle Proof diff --git a/docs/spec/blockchain/state.md b/docs/spec/blockchain/state.md index ff6fcf2e4..7df096bc9 100644 --- a/docs/spec/blockchain/state.md +++ b/docs/spec/blockchain/state.md @@ -60,7 +60,7 @@ When hashing the Validator struct, the address is not included, because it is redundant with the pubkey. The `state.Validators`, `state.LastValidators`, and `state.NextValidators`, must always by sorted by validator address, -so that there is a canonical order for computing the SimpleMerkleRoot. +so that there is a canonical order for computing the MerkleRoot. We also define a `TotalVotingPower` function, to return the total voting power: diff --git a/types/results.go b/types/results.go index db7811684..d7d82d894 100644 --- a/types/results.go +++ b/types/results.go @@ -3,25 +3,20 @@ package types import ( abci "github.com/tendermint/tendermint/abci/types" "github.com/tendermint/tendermint/crypto/merkle" - "github.com/tendermint/tendermint/crypto/tmhash" cmn "github.com/tendermint/tendermint/libs/common" ) //----------------------------------------------------------------------------- // ABCIResult is the deterministic component of a ResponseDeliverTx. -// TODO: add Tags +// TODO: add tags and other fields +// https://github.com/tendermint/tendermint/issues/1007 type ABCIResult struct { Code uint32 `json:"code"` Data cmn.HexBytes `json:"data"` } -// Hash returns the canonical hash of the ABCIResult -func (a ABCIResult) Hash() []byte { - bz := tmhash.Sum(cdcEncode(a)) - return bz -} - +// Bytes returns the amino encoded ABCIResult func (a ABCIResult) Bytes() []byte { return cdcEncode(a) } diff --git a/types/results_test.go b/types/results_test.go index def042d50..a37de9ec4 100644 --- a/types/results_test.go +++ b/types/results_test.go @@ -16,20 +16,21 @@ func TestABCIResults(t *testing.T) { e := ABCIResult{Code: 14, Data: []byte("foo")} f := ABCIResult{Code: 14, Data: []byte("bar")} - // Nil and []byte{} should produce the same hash. - require.Equal(t, a.Hash(), a.Hash()) - require.Equal(t, b.Hash(), b.Hash()) - require.Equal(t, a.Hash(), b.Hash()) + // Nil and []byte{} should produce the same bytes + require.Equal(t, a.Bytes(), a.Bytes()) + require.Equal(t, b.Bytes(), b.Bytes()) + require.Equal(t, a.Bytes(), b.Bytes()) // a and b should be the same, don't go in results. results := ABCIResults{a, c, d, e, f} - // Make sure each result hashes properly. + // Make sure each result serializes differently var last []byte - for i, res := range results { - h := res.Hash() - assert.NotEqual(t, last, h, "%d", i) - last = h + assert.Equal(t, last, a.Bytes()) // first one is empty + for i, res := range results[1:] { + bz := res.Bytes() + assert.NotEqual(t, last, bz, "%d", i) + last = bz } // Make sure that we can get a root hash from results and verify proofs. diff --git a/types/validator.go b/types/validator.go index 1de326b00..0b8967b24 100644 --- a/types/validator.go +++ b/types/validator.go @@ -4,8 +4,6 @@ import ( "bytes" "fmt" - "github.com/tendermint/tendermint/crypto/tmhash" - "github.com/tendermint/tendermint/crypto" cmn "github.com/tendermint/tendermint/libs/common" ) @@ -70,12 +68,6 @@ func (v *Validator) String() string { v.ProposerPriority) } -// Hash computes the unique ID of a validator with a given voting power. -// It excludes the ProposerPriority value, which changes with every round. -func (v *Validator) Hash() []byte { - return tmhash.Sum(v.Bytes()) -} - // Bytes computes the unique encoding of a validator with a given voting power. // These are the bytes that gets hashed in consensus. It excludes address // as its redundant with the pubkey. This also excludes ProposerPriority