From f2119c35de4118fbe6b45e578e9b6cc72ad8b3cb Mon Sep 17 00:00:00 2001 From: Sean Braithwaite Date: Mon, 15 Apr 2019 16:38:45 +0200 Subject: [PATCH 1/8] adr: PeerBehaviour updates (#3558) * [adr] Peer behaviour adr updates * [docs] fix Behaved function signature * [adr] typo fix in code example --- ...behaviour.md => adr-039-peer-behaviour.md} | 84 +++++++++++-------- 1 file changed, 49 insertions(+), 35 deletions(-) rename docs/architecture/{adr-037-peer-behaviour.md => adr-039-peer-behaviour.md} (57%) diff --git a/docs/architecture/adr-037-peer-behaviour.md b/docs/architecture/adr-039-peer-behaviour.md similarity index 57% rename from docs/architecture/adr-037-peer-behaviour.md rename to docs/architecture/adr-039-peer-behaviour.md index 36b024482..4ad051a35 100644 --- a/docs/architecture/adr-037-peer-behaviour.md +++ b/docs/architecture/adr-039-peer-behaviour.md @@ -1,7 +1,8 @@ -# ADR 037: Peer Behaviour Interface +# ADR 039: Peer Behaviour Interface ## Changelog * 07-03-2019: Initial draft +* 14-03-2019: Updates from feedback ## Context @@ -19,36 +20,46 @@ and ties up the reactors in a larger dependency graph when testing. Introduce a `PeerBehaviour` interface and concrete implementations which provide methods for reactors to signal peer behaviour without direct -coupling `p2p.Switch`. Introduce a ErrPeer to provide -concrete reasons for stopping peers. +coupling `p2p.Switch`. Introduce a ErrorBehaviourPeer to provide +concrete reasons for stopping peers. Introduce GoodBehaviourPeer to provide +concrete ways in which a peer contributes. ### Implementation Changes PeerBehaviour then becomes an interface for signaling peer errors as well as for marking peers as `good`. -XXX: It might be better to pass p2p.ID instead of the whole peer but as -a first draft maintain the underlying implementation as much as -possible. - ```go type PeerBehaviour interface { - Errored(peer Peer, reason ErrPeer) - MarkPeerAsGood(peer Peer) + Behaved(peer Peer, reason GoodBehaviourPeer) + Errored(peer Peer, reason ErrorBehaviourPeer) } ``` Instead of signaling peers to stop with arbitrary reasons: `reason interface{}` -We introduce a concrete error type ErrPeer: +We introduce a concrete error type ErrorBehaviourPeer: ```go -type ErrPeer int +type ErrorBehaviourPeer int const ( - ErrPeerUnknown = iota - ErrPeerBadMessage - ErrPeerMessageOutofOrder + ErrorBehaviourUnknown = iota + ErrorBehaviourBadMessage + ErrorBehaviourMessageOutofOrder + ... +) +``` + +To provide additional information on the ways a peer contributed, we introduce +the GoodBehaviourPeer type. + +```go +type GoodBehaviourPeer int + +const ( + GoodBehaviourVote = iota + GoodBehaviourBlockPart ... ) ``` @@ -60,11 +71,11 @@ type SwitchedPeerBehaviour struct { sw *Switch } -func (spb *SwitchedPeerBehaviour) Errored(peer Peer, reason ErrPeer) { +func (spb *SwitchedPeerBehaviour) Errored(peer Peer, reason ErrorBehaviourPeer) { spb.sw.StopPeerForError(peer, reason) } -func (spb *SwitchedPeerBehaviour) MarkPeerAsGood(peer Peer) { +func (spb *SwitchedPeerBehaviour) Behaved(peer Peer, reason GoodBehaviourPeer) { spb.sw.MarkPeerAsGood(peer) } @@ -75,51 +86,54 @@ func NewSwitchedPeerBehaviour(sw *Switch) *SwitchedPeerBehaviour { } ``` -Reactors, which are often difficult to unit test[2](#references). could use an implementation which exposes the signals produced by the reactor in +Reactors, which are often difficult to unit test[2](#references) could use an implementation which exposes the signals produced by the reactor in manufactured scenarios: ```go -type PeerErrors map[Peer][]ErrPeer -type GoodPeers map[Peer]bool +type ErrorBehaviours map[Peer][]ErrorBehaviourPeer +type GoodBehaviours map[Peer][]GoodBehaviourPeer type StorePeerBehaviour struct { - pe PeerErrors - gp GoodPeers + eb ErrorBehaviours + gb GoodBehaviours } func NewStorePeerBehaviour() *StorePeerBehaviour{ return &StorePeerBehaviour{ - pe: make(PeerErrors), - gp: GoodPeers{}, + eb: make(ErrorBehaviours), + gb: make(GoodBehaviours), } } -func (spb StorePeerBehaviour) Errored(peer Peer, reason ErrPeer) { - if _, ok := spb.pe[peer]; !ok { - spb.pe[peer] = []ErrPeer{reason} +func (spb StorePeerBehaviour) Errored(peer Peer, reason ErrorBehaviourPeer) { + if _, ok := spb.eb[peer]; !ok { + spb.eb[peer] = []ErrorBehaviours{reason} } else { - spb.pe[peer] = append(spb.pe[peer], reason) + spb.eb[peer] = append(spb.eb[peer], reason) } } -func (mpb *StorePeerBehaviour) GetPeerErrors() PeerErrors { - return mpb.pe +func (mpb *StorePeerBehaviour) GetErrored() ErrorBehaviours { + return mpb.eb } -func (spb *StorePeerBehaviour) MarkPeerAsGood(peer Peer) { - if _, ok := spb.gp[peer]; !ok { - spb.gp[peer] = true + +func (spb StorePeerBehaviour) Behaved(peer Peer, reason GoodBehaviourPeer) { + if _, ok := spb.gb[peer]; !ok { + spb.gb[peer] = []GoodBehaviourPeer{reason} + } else { + spb.gb[peer] = append(spb.gb[peer], reason) } } -func (spb *StorePeerBehaviour) GetGoodPeers() GoodPeers { - return spb.gp +func (spb *StorePeerBehaviour) GetBehaved() GoodBehaviours { + return spb.gb } ``` ## Status -Proposed +Accepted ## Consequences From 50b87c344503d07a12372b2fffece3ed66d915fe Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Mon, 15 Apr 2019 18:53:38 +0400 Subject: [PATCH 2/8] state: Use last height changed if validator set is empty (#3560) What happened: New code was supposed to fall back to last height changed when/if it failed to find validators at checkpoint height (to make release non-breaking). But because we did not check if validator set is empty, the fall back logic was never executed => resulting in LoadValidators returning an empty validator set for cases where `lastStoredHeight` is checkpoint height (i.e. almost all heights if the application does not change validator set often). How it was found: one of our users - @sunboshan reported a bug here https://github.com/tendermint/tendermint/pull/3537#issuecomment-482711833 * use last height changed in validator set is empty * add a changelog entry --- CHANGELOG_PENDING.md | 1 + state/store.go | 4 ++-- state/store_test.go | 34 +++++++++++++++++++++++++--------- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index bcb2af539..36c64be5e 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -19,3 +19,4 @@ ### IMPROVEMENTS: ### BUG FIXES: +- [state] [\#3537](https://github.com/tendermint/tendermint/pull/3537#issuecomment-482711833) LoadValidators: do not return an empty validator set diff --git a/state/store.go b/state/store.go index 73116b43f..0301bc7c3 100644 --- a/state/store.go +++ b/state/store.go @@ -193,7 +193,7 @@ func LoadValidators(db dbm.DB, height int64) (*types.ValidatorSet, error) { if valInfo.ValidatorSet == nil { lastStoredHeight := lastStoredHeightFor(height, valInfo.LastHeightChanged) valInfo2 := loadValidatorsInfo(db, lastStoredHeight) - if valInfo2 == nil { + if valInfo2 == nil || valInfo2.ValidatorSet == nil { // TODO (melekes): remove the below if condition in the 0.33 major // release and just panic. Old chains might panic otherwise if they // haven't saved validators at intermediate (%valSetCheckpointInterval) @@ -201,7 +201,7 @@ func LoadValidators(db dbm.DB, height int64) (*types.ValidatorSet, error) { // https://github.com/tendermint/tendermint/issues/3543 valInfo2 = loadValidatorsInfo(db, valInfo.LastHeightChanged) lastStoredHeight = valInfo.LastHeightChanged - if valInfo2 == nil { + if valInfo2 == nil || valInfo2.ValidatorSet == nil { panic( fmt.Sprintf("Couldn't find validators at height %d (height %d was originally requested)", lastStoredHeight, diff --git a/state/store_test.go b/state/store_test.go index dd48cae71..06adeefab 100644 --- a/state/store_test.go +++ b/state/store_test.go @@ -6,34 +6,50 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" cfg "github.com/tendermint/tendermint/config" dbm "github.com/tendermint/tendermint/libs/db" "github.com/tendermint/tendermint/types" ) -func TestSaveValidatorsInfo(t *testing.T) { - // test we persist validators every valSetCheckpointInterval blocks +func TestStoreLoadValidators(t *testing.T) { stateDB := dbm.NewMemDB() val, _ := types.RandValidator(true, 10) vals := types.NewValidatorSet([]*types.Validator{val}) - // TODO(melekes): remove in 0.33 release - // https://github.com/tendermint/tendermint/issues/3543 + // 1) LoadValidators loads validators using a height where they were last changed saveValidatorsInfo(stateDB, 1, 1, vals) saveValidatorsInfo(stateDB, 2, 1, vals) + loadedVals, err := LoadValidators(stateDB, 2) + require.NoError(t, err) + assert.NotZero(t, loadedVals.Size()) + + // 2) LoadValidators loads validators using a checkpoint height + + // TODO(melekes): REMOVE in 0.33 release + // https://github.com/tendermint/tendermint/issues/3543 + // for releases prior to v0.31.4, it uses last height changed + valInfo := &ValidatorsInfo{ + LastHeightChanged: valSetCheckpointInterval, + } + stateDB.Set(calcValidatorsKey(valSetCheckpointInterval), valInfo.Bytes()) assert.NotPanics(t, func() { - _, err := LoadValidators(stateDB, 2) + saveValidatorsInfo(stateDB, valSetCheckpointInterval+1, 1, vals) + loadedVals, err := LoadValidators(stateDB, valSetCheckpointInterval+1) if err != nil { - panic(err) + t.Fatal(err) + } + if loadedVals.Size() == 0 { + t.Fatal("Expected validators to be non-empty") } }) - //ENDREMOVE + // ENDREMOVE saveValidatorsInfo(stateDB, valSetCheckpointInterval, 1, vals) - loadedVals, err := LoadValidators(stateDB, valSetCheckpointInterval) - assert.NoError(t, err) + loadedVals, err = LoadValidators(stateDB, valSetCheckpointInterval) + require.NoError(t, err) assert.NotZero(t, loadedVals.Size()) } From f1cf10150a1a75832cb2c1fd3679d5beef25884f Mon Sep 17 00:00:00 2001 From: dongsamb Date: Tue, 16 Apr 2019 13:49:03 +0900 Subject: [PATCH 3/8] gitignore: add .vendor-new (#3566) --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 1cf9cdb90..10ee3099c 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,7 @@ shunit2 addrbook.json */vendor +.vendor-new/ */.glide .terraform terraform.tfstate From 439312b9c0ac6a21cf9a0d08589b512c017954e1 Mon Sep 17 00:00:00 2001 From: zjubfd Date: Tue, 16 Apr 2019 15:54:19 +0800 Subject: [PATCH 4/8] blockchain: dismiss request channel delay (#3459) Fixes #3457 The topic of the issue is that : write a BlockRequest int requestsCh channel will create an timer at the same time that stop the peer 15s later if no block have been received . But pop a BlockRequest from requestsCh and send it out may delay more than 15s later. So that the peer will be stopped for error("send nothing to us"). Extracting requestsCh into its own goroutine can make sure that every BlockRequest been handled timely. Instead of the requestsCh handling, we should probably pull the didProcessCh handling in a separate go routine since this is the one "starving" the other channel handlers. I believe the way it is right now, we still have issues with high delays in errorsCh handling that might cause sending requests to invalid/ disconnected peers. --- blockchain/reactor.go | 55 +++++++++++++++++------------- libs/common/throttle_timer_test.go | 1 + libs/db/mem_batch.go | 4 +-- 3 files changed, 33 insertions(+), 27 deletions(-) diff --git a/blockchain/reactor.go b/blockchain/reactor.go index 4a3f90493..139393778 100644 --- a/blockchain/reactor.go +++ b/blockchain/reactor.go @@ -228,32 +228,40 @@ func (bcR *BlockchainReactor) poolRoutine() { didProcessCh := make(chan struct{}, 1) -FOR_LOOP: - for { - select { - case request := <-bcR.requestsCh: - peer := bcR.Switch.Peers().Get(request.PeerID) - if peer == nil { - continue FOR_LOOP // Peer has since been disconnected. - } - msgBytes := cdc.MustMarshalBinaryBare(&bcBlockRequestMessage{request.Height}) - queued := peer.TrySend(BlockchainChannel, msgBytes) - if !queued { - // We couldn't make the request, send-queue full. - // The pool handles timeouts, just let it go. - continue FOR_LOOP - } + go func() { + for { + select { + case <-bcR.Quit(): + return + case <-bcR.pool.Quit(): + return + case request := <-bcR.requestsCh: + peer := bcR.Switch.Peers().Get(request.PeerID) + if peer == nil { + continue + } + msgBytes := cdc.MustMarshalBinaryBare(&bcBlockRequestMessage{request.Height}) + queued := peer.TrySend(BlockchainChannel, msgBytes) + if !queued { + bcR.Logger.Debug("Send queue is full, drop block request", "peer", peer.ID(), "height", request.Height) + } + case err := <-bcR.errorsCh: + peer := bcR.Switch.Peers().Get(err.peerID) + if peer != nil { + bcR.Switch.StopPeerForError(peer, err) + } - case err := <-bcR.errorsCh: - peer := bcR.Switch.Peers().Get(err.peerID) - if peer != nil { - bcR.Switch.StopPeerForError(peer, err) - } + case <-statusUpdateTicker.C: + // ask for status updates + go bcR.BroadcastStatusRequest() // nolint: errcheck - case <-statusUpdateTicker.C: - // ask for status updates - go bcR.BroadcastStatusRequest() // nolint: errcheck + } + } + }() +FOR_LOOP: + for { + select { case <-switchToConsensusTicker.C: height, numPending, lenRequesters := bcR.pool.GetStatus() outbound, inbound, _ := bcR.Switch.NumPeers() @@ -262,7 +270,6 @@ FOR_LOOP: if bcR.pool.IsCaughtUp() { bcR.Logger.Info("Time to switch to consensus reactor!", "height", height) bcR.pool.Stop() - conR, ok := bcR.Switch.Reactor("CONSENSUS").(consensusReactor) if ok { conR.SwitchToConsensus(state, blocksSynced) diff --git a/libs/common/throttle_timer_test.go b/libs/common/throttle_timer_test.go index 00f5abdec..f5c9dfeff 100644 --- a/libs/common/throttle_timer_test.go +++ b/libs/common/throttle_timer_test.go @@ -6,6 +6,7 @@ import ( "time" // make govet noshadow happy... + asrt "github.com/stretchr/testify/assert" ) diff --git a/libs/db/mem_batch.go b/libs/db/mem_batch.go index ebba43f54..2ce765786 100644 --- a/libs/db/mem_batch.go +++ b/libs/db/mem_batch.go @@ -1,8 +1,6 @@ package db -import ( - "sync" -) +import "sync" type atomicSetDeleter interface { Mutex() *sync.Mutex From 5b8888b01b7542cd6dd856e7a322f3d4e5ccba16 Mon Sep 17 00:00:00 2001 From: hucc Date: Tue, 16 Apr 2019 16:04:09 +0800 Subject: [PATCH 5/8] common: CMap: slight optimization in Keys() and Values(). (#3567) --- libs/common/cmap.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/common/cmap.go b/libs/common/cmap.go index 2f7720d2e..d87adb762 100644 --- a/libs/common/cmap.go +++ b/libs/common/cmap.go @@ -56,7 +56,7 @@ func (cm *CMap) Clear() { func (cm *CMap) Keys() []string { cm.l.Lock() - keys := []string{} + keys := make([]string, 0, len(cm.m)) for k := range cm.m { keys = append(keys, k) } @@ -66,7 +66,7 @@ func (cm *CMap) Keys() []string { func (cm *CMap) Values() []interface{} { cm.l.Lock() - items := []interface{}{} + items := make([]interface{}, 0, len(cm.m)) for _, v := range cm.m { items = append(items, v) } From 3cb7013c3886f8144e8791908c9c07e3072b357c Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Tue, 16 Apr 2019 13:33:51 +0400 Subject: [PATCH 6/8] update changelog --- CHANGELOG.md | 32 ++++++++++++++++++++++++++++---- CHANGELOG_PENDING.md | 3 +-- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 057b2e7be..161514778 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,28 @@ # Changelog +## v0.31.5 + +*April 16th, 2019* + +This release fixes a regression from v0.31.4 where, in existing chains that +were upgraded, `/validators` could return an empty validator set. This is true +for almost all heights given the validator set remains the same. + +Special thanks to external contributors on this release: +@brapse, @guagualvcha, @dongsam, @phucc + +### IMPROVEMENTS: + +- [libs/common] CMap: slight optimization in Keys() and Values() (@phucc) +- [gitignore] gitignore: add .vendor-new (@dongsam) + +### BUG FIXES: + +- [state] [\#3537](https://github.com/tendermint/tendermint/pull/3537#issuecomment-482711833) + LoadValidators: do not return an empty validator set +- [blockchain] [\#3457](https://github.com/tendermint/tendermint/issues/3457) + Fix "peer did not send us anything" in `fast_sync` mode when under high pressure + ## v0.31.4 *April 12th, 2019* @@ -9,13 +32,14 @@ the address book. This swallowed the peer's self-reported port which is importan It brings back `NetAddress()` to `NodeInfo` and uses it instead of `SocketAddr` for adding peers. Additionally, it improves response time on the `/validators` or `/status` RPC endpoints. As a side-effect it makes these RPC endpoint more difficult to DoS and fixes a performance degradation in `ExecCommitBlock`. -Also, it contains an [ADR](https://github.com/tendermint/tendermint/pull/3539) that proposes decoupling the -responsibility for peer behaviour from the `p2p.Switch` (by @brapse). +Also, it contains an [ADR](https://github.com/tendermint/tendermint/pull/3539) that proposes decoupling the +responsibility for peer behaviour from the `p2p.Switch` (by @brapse). Special thanks to external contributors on this release: @brapse, @guagualvcha, @mydring ### IMPROVEMENTS: + - [p2p] [\#3463](https://github.com/tendermint/tendermint/pull/3463) Do not log "Can't add peer's address to addrbook" error for a private peer - [p2p] [\#3547](https://github.com/tendermint/tendermint/pull/3547) Fix a couple of annoying typos (@mdyring) @@ -43,8 +67,8 @@ panic if the lookup failed. ### BREAKING CHANGES: * Go API - - [crypto/secp256k1] [\#3439](https://github.com/tendermint/tendermint/issues/3439) - The `secp256k1.GenPrivKeySecp256k1` function has changed to guarantee that it returns a valid key, which means it + - [crypto/secp256k1] [\#3439](https://github.com/tendermint/tendermint/issues/3439) + The `secp256k1.GenPrivKeySecp256k1` function has changed to guarantee that it returns a valid key, which means it will return a different private key than in previous versions for the same secret. ### BUG FIXES: diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 36c64be5e..fae9e6568 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -1,4 +1,4 @@ -## v0.31.5 +## v0.31.6 ** @@ -19,4 +19,3 @@ ### IMPROVEMENTS: ### BUG FIXES: -- [state] [\#3537](https://github.com/tendermint/tendermint/pull/3537#issuecomment-482711833) LoadValidators: do not return an empty validator set From 4474a5ec70fad39d78b28a543d72c25715bfca84 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Tue, 16 Apr 2019 13:34:01 +0400 Subject: [PATCH 7/8] bump version --- version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version/version.go b/version/version.go index 9ba38de6c..a7559d499 100644 --- a/version/version.go +++ b/version/version.go @@ -20,7 +20,7 @@ const ( // Must be a string because scripts like dist.sh read this file. // XXX: Don't change the name of this variable or you will break // automation :) - TMCoreSemVer = "0.31.4" + TMCoreSemVer = "0.31.5" // ABCISemVer is the semantic version of the ABCI library ABCISemVer = "0.16.0" From 18bd5b627a99927877a92b50bee705aacf913b63 Mon Sep 17 00:00:00 2001 From: Ismail Khoffi Date: Tue, 16 Apr 2019 15:13:30 +0400 Subject: [PATCH 8/8] Apply suggestions from code review Co-Authored-By: melekes --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 161514778..9d498060b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,20 +6,20 @@ This release fixes a regression from v0.31.4 where, in existing chains that were upgraded, `/validators` could return an empty validator set. This is true -for almost all heights given the validator set remains the same. +for almost all heights, given the validator set remains the same. Special thanks to external contributors on this release: @brapse, @guagualvcha, @dongsam, @phucc ### IMPROVEMENTS: -- [libs/common] CMap: slight optimization in Keys() and Values() (@phucc) +- [libs/common] `CMap`: slight optimization in `Keys()` and `Values()` (@phucc) - [gitignore] gitignore: add .vendor-new (@dongsam) ### BUG FIXES: - [state] [\#3537](https://github.com/tendermint/tendermint/pull/3537#issuecomment-482711833) - LoadValidators: do not return an empty validator set + `LoadValidators`: do not return an empty validator set - [blockchain] [\#3457](https://github.com/tendermint/tendermint/issues/3457) Fix "peer did not send us anything" in `fast_sync` mode when under high pressure