From 5ef639fcbe6786fdeb99fd7754fa38a45655c84d Mon Sep 17 00:00:00 2001 From: Javed Khan Date: Mon, 2 Apr 2018 10:15:02 +0530 Subject: [PATCH 01/30] p2p: persistent - redial if first dial fails Fixes #1401 --- CHANGELOG.md | 1 + p2p/CHANGELOG.md | 1 + p2p/peer.go | 5 ++++- p2p/switch.go | 35 +++++++++++++++++++++-------------- p2p/switch_test.go | 29 +++++++++++++++++++++++++++++ 5 files changed, 56 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aea9b5450..bad58654c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ IMPROVEMENTS: BUG FIXES: - Graceful handling/recovery for apps that have non-determinism or fail to halt - Graceful handling/recovery for violations of safety, or liveness +- Fix reconnect to persistent peer when first dial fails ## 0.17.1 (March 27th, 2018) diff --git a/p2p/CHANGELOG.md b/p2p/CHANGELOG.md index cae2f4c9f..260924a08 100644 --- a/p2p/CHANGELOG.md +++ b/p2p/CHANGELOG.md @@ -7,6 +7,7 @@ BREAKING CHANGES: - Remove or unexport methods from FuzzedConnection: Active, Mode, ProbDropRW, ProbDropConn, ProbSleep, MaxDelayMilliseconds, Fuzz - switch.AddPeerWithConnection is unexported and replaced by switch.AddPeer - switch.DialPeerWithAddress takes a bool, setting the peer as persistent or not +- PeerConfig requires a Dial function FEATURES: diff --git a/p2p/peer.go b/p2p/peer.go index 4af6eeaae..9e228efde 100644 --- a/p2p/peer.go +++ b/p2p/peer.go @@ -87,6 +87,8 @@ func newPeer(pc peerConn, nodeInfo NodeInfo, type PeerConfig struct { AuthEnc bool `mapstructure:"auth_enc"` // authenticated encryption + Dial func(addr *NetAddress, config *PeerConfig) (net.Conn, error) + // times are in seconds HandshakeTimeout time.Duration `mapstructure:"handshake_timeout"` DialTimeout time.Duration `mapstructure:"dial_timeout"` @@ -101,6 +103,7 @@ type PeerConfig struct { func DefaultPeerConfig() *PeerConfig { return &PeerConfig{ AuthEnc: true, + Dial: dial, HandshakeTimeout: 20, // * time.Second, DialTimeout: 3, // * time.Second, MConfig: tmconn.DefaultMConnConfig(), @@ -112,7 +115,7 @@ func DefaultPeerConfig() *PeerConfig { func newOutboundPeerConn(addr *NetAddress, config *PeerConfig, persistent bool, ourNodePrivKey crypto.PrivKey) (peerConn, error) { var pc peerConn - conn, err := dial(addr, config) + conn, err := config.Dial(addr, config) if err != nil { return pc, errors.Wrap(err, "Error creating peer") } diff --git a/p2p/switch.go b/p2p/switch.go index fa037a9b8..1a80d6435 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -56,6 +56,7 @@ type Switch struct { reactorsByCh map[byte]Reactor peers *PeerSet dialing *cmn.CMap + reconnecting *cmn.CMap nodeInfo NodeInfo // our node info nodeKey *NodeKey // our node privkey addrBook AddrBook @@ -75,6 +76,7 @@ func NewSwitch(config *cfg.P2PConfig) *Switch { reactorsByCh: make(map[byte]Reactor), peers: NewPeerSet(), dialing: cmn.NewCMap(), + reconnecting: cmn.NewCMap(), } // Ensure we have a completely undeterministic PRNG. cmd.RandInt64() draws @@ -255,7 +257,7 @@ func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) { sw.stopAndRemovePeer(peer, reason) if peer.IsPersistent() { - go sw.reconnectToPeer(peer) + go sw.reconnectToPeer(peer.NodeInfo().NetAddress()) } } @@ -274,24 +276,28 @@ func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) { } } -// reconnectToPeer tries to reconnect to the peer, first repeatedly +// reconnectToPeer tries to reconnect to the addr, first repeatedly // with a fixed interval, then with exponential backoff. // If no success after all that, it stops trying, and leaves it -// to the PEX/Addrbook to find the peer again -func (sw *Switch) reconnectToPeer(peer Peer) { - // NOTE this will connect to the self reported address, - // not necessarily the original we dialed - netAddr := peer.NodeInfo().NetAddress() +// to the PEX/Addrbook to find the peer with the addr again +func (sw *Switch) reconnectToPeer(addr *NetAddress) { + if sw.reconnecting.Has(string(addr.ID)) { + return + } + + sw.reconnecting.Set(string(addr.ID), addr) + defer sw.reconnecting.Delete(string(addr.ID)) + start := time.Now() - sw.Logger.Info("Reconnecting to peer", "peer", peer) + sw.Logger.Info("Reconnecting to peer", "addr", addr) for i := 0; i < reconnectAttempts; i++ { if !sw.IsRunning() { return } - err := sw.DialPeerWithAddress(netAddr, true) + err := sw.DialPeerWithAddress(addr, true) if err != nil { - sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "peer", peer) + sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) // sleep a set amount sw.randomSleep(reconnectInterval) continue @@ -301,7 +307,7 @@ func (sw *Switch) reconnectToPeer(peer Peer) { } sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff", - "peer", peer, "elapsed", time.Since(start)) + "addr", addr, "elapsed", time.Since(start)) for i := 0; i < reconnectBackOffAttempts; i++ { if !sw.IsRunning() { return @@ -310,13 +316,13 @@ func (sw *Switch) reconnectToPeer(peer Peer) { // sleep an exponentially increasing amount sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i)) sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second) - err := sw.DialPeerWithAddress(netAddr, true) + err := sw.DialPeerWithAddress(addr, true) if err == nil { return // success } - sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "peer", peer) + sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) } - sw.Logger.Error("Failed to reconnect to peer. Giving up", "peer", peer, "elapsed", time.Since(start)) + sw.Logger.Error("Failed to reconnect to peer. Giving up", "addr", addr, "elapsed", time.Since(start)) } // SetAddrBook allows to set address book on Switch. @@ -470,6 +476,7 @@ func (sw *Switch) addOutboundPeerWithConfig(addr *NetAddress, config *PeerConfig peerConn, err := newOutboundPeerConn(addr, config, persistent, sw.nodeKey.PrivKey) if err != nil { sw.Logger.Error("Failed to dial peer", "address", addr, "err", err) + go sw.reconnectToPeer(addr) return err } diff --git a/p2p/switch_test.go b/p2p/switch_test.go index 06e8b642e..5663ccea7 100644 --- a/p2p/switch_test.go +++ b/p2p/switch_test.go @@ -8,6 +8,7 @@ import ( "testing" "time" + "github.com/pkg/errors" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -23,6 +24,11 @@ var ( config *cfg.P2PConfig ) +// badDial returns an error for testing dial errors +func badDial(addr *NetAddress, config *PeerConfig) (net.Conn, error) { + return nil, errors.New("dial err") +} + func init() { config = cfg.DefaultP2PConfig() config.PexReactor = true @@ -295,6 +301,29 @@ func TestSwitchReconnectsToPersistentPeer(t *testing.T) { } assert.NotZero(npeers) assert.False(peer.IsRunning()) + + // simulate another remote peer + rp = &remotePeer{PrivKey: crypto.GenPrivKeyEd25519().Wrap(), Config: DefaultPeerConfig()} + rp.Start() + defer rp.Stop() + + // simulate first time dial failure + peerConfig := DefaultPeerConfig() + peerConfig.Dial = badDial + err = sw.addOutboundPeerWithConfig(rp.Addr(), peerConfig, true) + require.NotNil(err) + + // DialPeerWithAddres - sw.peerConfig resets the dialer + + // TODO: same as above + for i := 0; i < 20; i++ { + time.Sleep(250 * time.Millisecond) + npeers = sw.Peers().Size() + if npeers > 1 { + break + } + } + assert.EqualValues(2, npeers) } func TestSwitchFullConnectivity(t *testing.T) { From 54adb790f2721363e92e9b59edea2f2354fc3e2e Mon Sep 17 00:00:00 2001 From: Javed Khan Date: Sat, 7 Apr 2018 11:31:56 +0530 Subject: [PATCH 02/30] p2p: switch - reconnect only if persistent --- p2p/switch.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/p2p/switch.go b/p2p/switch.go index 1a80d6435..21632213e 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -476,7 +476,9 @@ func (sw *Switch) addOutboundPeerWithConfig(addr *NetAddress, config *PeerConfig peerConn, err := newOutboundPeerConn(addr, config, persistent, sw.nodeKey.PrivKey) if err != nil { sw.Logger.Error("Failed to dial peer", "address", addr, "err", err) - go sw.reconnectToPeer(addr) + if persistent { + go sw.reconnectToPeer(addr) + } return err } From 5d8767e656176447f35070dc94064511db43a5e1 Mon Sep 17 00:00:00 2001 From: Javed Khan Date: Sat, 7 Apr 2018 12:51:51 +0530 Subject: [PATCH 03/30] p2p: don't use dial funcn in peerconfig --- p2p/peer.go | 9 ++++----- p2p/switch_test.go | 10 ++++++++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/p2p/peer.go b/p2p/peer.go index 9e228efde..db016c0a3 100644 --- a/p2p/peer.go +++ b/p2p/peer.go @@ -87,14 +87,13 @@ func newPeer(pc peerConn, nodeInfo NodeInfo, type PeerConfig struct { AuthEnc bool `mapstructure:"auth_enc"` // authenticated encryption - Dial func(addr *NetAddress, config *PeerConfig) (net.Conn, error) - // times are in seconds HandshakeTimeout time.Duration `mapstructure:"handshake_timeout"` DialTimeout time.Duration `mapstructure:"dial_timeout"` MConfig *tmconn.MConnConfig `mapstructure:"connection"` + Fail bool `mapstructure:"fail"` // for testing Fuzz bool `mapstructure:"fuzz"` // fuzz connection (for testing) FuzzConfig *FuzzConnConfig `mapstructure:"fuzz_config"` } @@ -103,10 +102,10 @@ type PeerConfig struct { func DefaultPeerConfig() *PeerConfig { return &PeerConfig{ AuthEnc: true, - Dial: dial, HandshakeTimeout: 20, // * time.Second, DialTimeout: 3, // * time.Second, MConfig: tmconn.DefaultMConnConfig(), + Fail: false, Fuzz: false, FuzzConfig: DefaultFuzzConnConfig(), } @@ -115,7 +114,7 @@ func DefaultPeerConfig() *PeerConfig { func newOutboundPeerConn(addr *NetAddress, config *PeerConfig, persistent bool, ourNodePrivKey crypto.PrivKey) (peerConn, error) { var pc peerConn - conn, err := config.Dial(addr, config) + conn, err := dial(addr, config) if err != nil { return pc, errors.Wrap(err, "Error creating peer") } @@ -347,7 +346,7 @@ func (p *peer) String() string { //------------------------------------------------------------------ // helper funcs -func dial(addr *NetAddress, config *PeerConfig) (net.Conn, error) { +var dial = func(addr *NetAddress, config *PeerConfig) (net.Conn, error) { conn, err := addr.DialTimeout(config.DialTimeout * time.Second) if err != nil { return nil, err diff --git a/p2p/switch_test.go b/p2p/switch_test.go index 5663ccea7..7feb4bda5 100644 --- a/p2p/switch_test.go +++ b/p2p/switch_test.go @@ -24,14 +24,20 @@ var ( config *cfg.P2PConfig ) +var goodDial = dial + // badDial returns an error for testing dial errors func badDial(addr *NetAddress, config *PeerConfig) (net.Conn, error) { - return nil, errors.New("dial err") + if config.Fail { + return nil, errors.New("dial err") + } + return goodDial(addr, config) } func init() { config = cfg.DefaultP2PConfig() config.PexReactor = true + dial = badDial } type PeerMessage struct { @@ -309,7 +315,7 @@ func TestSwitchReconnectsToPersistentPeer(t *testing.T) { // simulate first time dial failure peerConfig := DefaultPeerConfig() - peerConfig.Dial = badDial + peerConfig.Fail = true err = sw.addOutboundPeerWithConfig(rp.Addr(), peerConfig, true) require.NotNil(err) From d92def4b6099ef864c2453c3402f6c1ffaac5de8 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 00:06:05 -0400 Subject: [PATCH 04/30] version bump --- version/version.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/version/version.go b/version/version.go index 58e3f5e79..31479b32b 100644 --- a/version/version.go +++ b/version/version.go @@ -4,13 +4,13 @@ package version const ( Maj = "0" Min = "19" - Fix = "1" + Fix = "2" ) var ( // Version is the current version of Tendermint // Must be a string because scripts like dist.sh read this file. - Version = "0.19.1" + Version = "0.19.2-dev" // GitCommit is the current HEAD set using ldflags. GitCommit string From 0cbbb61962df84d1367c3f7da6e93638ae065819 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 01:02:39 -0400 Subject: [PATCH 05/30] minor cleanup --- CHANGELOG.md | 5 +++ p2p/CHANGELOG.md | 79 ---------------------------------------------- p2p/peer.go | 12 ++++--- p2p/switch.go | 8 +++-- p2p/switch_test.go | 16 ++-------- 5 files changed, 20 insertions(+), 100 deletions(-) delete mode 100644 p2p/CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bfae341c..c2bdb3f18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,11 @@ IMPROVEMENTS: BUG FIXES: - Graceful handling/recovery for apps that have non-determinism or fail to halt - Graceful handling/recovery for violations of safety, or liveness + +## 0.19.2 (TBD) + +BUG FIXES: + - Fix reconnect to persistent peer when first dial fails ## 0.19.1 (April 27th, 2018) diff --git a/p2p/CHANGELOG.md b/p2p/CHANGELOG.md deleted file mode 100644 index 260924a08..000000000 --- a/p2p/CHANGELOG.md +++ /dev/null @@ -1,79 +0,0 @@ -# Changelog - -## 0.5.0 (April 21, 2017) - -BREAKING CHANGES: - -- Remove or unexport methods from FuzzedConnection: Active, Mode, ProbDropRW, ProbDropConn, ProbSleep, MaxDelayMilliseconds, Fuzz -- switch.AddPeerWithConnection is unexported and replaced by switch.AddPeer -- switch.DialPeerWithAddress takes a bool, setting the peer as persistent or not -- PeerConfig requires a Dial function - -FEATURES: - -- Persistent peers: any peer considered a "seed" will be reconnected to when the connection is dropped - - -IMPROVEMENTS: - -- Many more tests and comments -- Refactor configurations for less dependence on go-config. Introduces new structs PeerConfig, MConnConfig, FuzzConnConfig -- New methods on peer: CloseConn, HandshakeTimeout, IsPersistent, Addr, PubKey -- NewNetAddress supports a testing mode where the address defaults to 0.0.0.0:0 - - -## 0.4.0 (March 6, 2017) - -BREAKING CHANGES: - -- DialSeeds now takes an AddrBook and returns an error: `DialSeeds(*AddrBook, []string) error` -- NewNetAddressString now returns an error: `NewNetAddressString(string) (*NetAddress, error)` - -FEATURES: - -- `NewNetAddressStrings([]string) ([]*NetAddress, error)` -- `AddrBook.Save()` - -IMPROVEMENTS: - -- PexReactor responsible for starting and stopping the AddrBook - -BUG FIXES: - -- DialSeeds returns an error instead of panicking on bad addresses - -## 0.3.5 (January 12, 2017) - -FEATURES - -- Toggle strict routability in the AddrBook - -BUG FIXES - -- Close filtered out connections -- Fixes for MakeConnectedSwitches and Connect2Switches - -## 0.3.4 (August 10, 2016) - -FEATURES: - -- Optionally filter connections by address or public key - -## 0.3.3 (May 12, 2016) - -FEATURES: - -- FuzzConn - -## 0.3.2 (March 12, 2016) - -IMPROVEMENTS: - -- Memory optimizations - -## 0.3.1 () - -FEATURES: - -- Configurable parameters - diff --git a/p2p/peer.go b/p2p/peer.go index 9008180fb..b9c8f8b41 100644 --- a/p2p/peer.go +++ b/p2p/peer.go @@ -90,8 +90,8 @@ type PeerConfig struct { MConfig *tmconn.MConnConfig `mapstructure:"connection"` - Fail bool `mapstructure:"fail"` // for testing - Fuzz bool `mapstructure:"fuzz"` // fuzz connection (for testing) + DialFail bool `mapstructure:"dial_fail"` // for testing + Fuzz bool `mapstructure:"fuzz"` // fuzz connection (for testing) FuzzConfig *FuzzConnConfig `mapstructure:"fuzz_config"` } @@ -102,7 +102,7 @@ func DefaultPeerConfig() *PeerConfig { HandshakeTimeout: 20, // * time.Second, DialTimeout: 3, // * time.Second, MConfig: tmconn.DefaultMConnConfig(), - Fail: false, + DialFail: false, Fuzz: false, FuzzConfig: DefaultFuzzConnConfig(), } @@ -339,7 +339,11 @@ func (p *peer) String() string { //------------------------------------------------------------------ // helper funcs -var dial = func(addr *NetAddress, config *PeerConfig) (net.Conn, error) { +func dial(addr *NetAddress, config *PeerConfig) (net.Conn, error) { + if config.DialFail { + return nil, fmt.Errorf("dial err (peerConfig.DialFail == true)") + } + conn, err := addr.DialTimeout(config.DialTimeout * time.Second) if err != nil { return nil, err diff --git a/p2p/switch.go b/p2p/switch.go index f475344be..b47b967f9 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -283,11 +283,12 @@ func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) { // with a fixed interval, then with exponential backoff. // If no success after all that, it stops trying, and leaves it // to the PEX/Addrbook to find the peer with the addr again +// NOTE: this will keep trying even if the handshake or auth fails. +// TODO: be more explicit with error types so we only retry on certain failures func (sw *Switch) reconnectToPeer(addr *NetAddress) { if sw.reconnecting.Has(string(addr.ID)) { return } - sw.reconnecting.Set(string(addr.ID), addr) defer sw.reconnecting.Delete(string(addr.ID)) @@ -381,13 +382,14 @@ func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent b go func(i int) { j := perm[i] + addr := netAddrs[j] // do not dial ourselves - if netAddrs[j].Same(ourAddr) { + if addr.Same(ourAddr) { return } sw.randomSleep(0) - err := sw.DialPeerWithAddress(netAddrs[j], persistent) + err := sw.DialPeerWithAddress(addr, persistent) if err != nil { sw.Logger.Error("Error dialing peer", "err", err) } diff --git a/p2p/switch_test.go b/p2p/switch_test.go index c1f6eae61..25ed73bce 100644 --- a/p2p/switch_test.go +++ b/p2p/switch_test.go @@ -8,7 +8,6 @@ import ( "testing" "time" - "github.com/pkg/errors" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -23,20 +22,9 @@ var ( config *cfg.P2PConfig ) -var goodDial = dial - -// badDial returns an error for testing dial errors -func badDial(addr *NetAddress, config *PeerConfig) (net.Conn, error) { - if config.Fail { - return nil, errors.New("dial err") - } - return goodDial(addr, config) -} - func init() { config = cfg.DefaultP2PConfig() config.PexReactor = true - dial = badDial } type PeerMessage struct { @@ -329,13 +317,13 @@ func TestSwitchReconnectsToPersistentPeer(t *testing.T) { assert.False(peer.IsRunning()) // simulate another remote peer - rp = &remotePeer{PrivKey: crypto.GenPrivKeyEd25519().Wrap(), Config: DefaultPeerConfig()} + rp = &remotePeer{PrivKey: crypto.GenPrivKeyEd25519(), Config: DefaultPeerConfig()} rp.Start() defer rp.Stop() // simulate first time dial failure peerConfig := DefaultPeerConfig() - peerConfig.Fail = true + peerConfig.DialFail = true err = sw.addOutboundPeerWithConfig(rp.Addr(), peerConfig, true) require.NotNil(err) From 936d1a0e681a060969db26edb29ed55605565a1b Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Tue, 20 Mar 2018 21:41:08 +0100 Subject: [PATCH 06/30] some notes about the p2p layer --- .../new-spec/reactors/pex/pex.md | 27 +++++++-- node/node.go | 42 +++++++++----- p2p/netaddress.go | 1 + p2p/pex/pex_reactor.go | 57 +++++++++++++++---- p2p/switch.go | 6 +- rpc/core/net.go | 2 + 6 files changed, 105 insertions(+), 30 deletions(-) diff --git a/docs/specification/new-spec/reactors/pex/pex.md b/docs/specification/new-spec/reactors/pex/pex.md index d3981bda2..29d65d17d 100644 --- a/docs/specification/new-spec/reactors/pex/pex.md +++ b/docs/specification/new-spec/reactors/pex/pex.md @@ -7,7 +7,8 @@ to good peers and to gossip peers to others. ## Peer Types Certain peers are special in that they are specified by the user as `persistent`, -which means we auto-redial them if the connection fails. +which means we auto-redial them if the connection fails, or if we fail to dial +them. Some peers can be marked as `private`, which means we will not put them in the address book or gossip them to others. @@ -19,6 +20,11 @@ Peer discovery begins with a list of seeds. When we have no peers, or have been unable to find enough peers from existing ones, we dial a randomly selected seed to get a list of peers to dial. +On startup, we will also immediately dial the given list of `persisten_peers`, +and will attempt to maintain persistent connections with them. If the connections die, +we will redial every 5s for a a few minutes, then switch to an exponential backoff schedule, +and after about a day of trying, stop dialing the peer. + So long as we have less than `MaxPeers`, we periodically request additional peers from each of our own. If sufficient time goes by and we still can't find enough peers, we try the seeds again. @@ -68,6 +74,19 @@ Connection attempts are made with exponential backoff (plus jitter). Because the selection process happens every `ensurePeersPeriod`, we might not end up dialing a peer for much longer than the backoff duration. +....... +----------------- +TODO: put this in the right place. And add more details. + +AddrBook: If there's no space in the book, we check the bucket for bad peers, which include peers we've attempted to +dial 3 times, and then remove them from only that bucket. + +PEX: if we fail to conenct to the peer after 16 tries (with exponential backoff), we remove from address book completely. + +----------------- +....... + + ## Select Peers to Exchange When we’re asked for peers, we select them as follows: @@ -86,9 +105,9 @@ Note that the bad behaviour may be detected outside the PEX reactor itself (for instance, in the mconnection, or another reactor), but it must be communicated to the PEX reactor so it can remove and mark the peer. -In the PEX, if a peer sends us unsolicited lists of peers, -or if the peer sends too many requests for more peers in a given amount of time, -we Disconnect and Mark. +In the PEX, if a peer sends us an unsolicited list of peers, +or if the peer sends a request too soon after another one, +we Disconnect and MarkBad. ## Trust Metric diff --git a/node/node.go b/node/node.go index 205e672f3..00d760664 100644 --- a/node/node.go +++ b/node/node.go @@ -6,6 +6,7 @@ import ( "fmt" "net" "net/http" + "strings" abci "github.com/tendermint/abci/types" amino "github.com/tendermint/go-amino" @@ -21,7 +22,6 @@ import ( mempl "github.com/tendermint/tendermint/mempool" "github.com/tendermint/tendermint/p2p" "github.com/tendermint/tendermint/p2p/pex" - "github.com/tendermint/tendermint/p2p/trust" "github.com/tendermint/tendermint/proxy" rpccore "github.com/tendermint/tendermint/rpc/core" ctypes "github.com/tendermint/tendermint/rpc/core/types" @@ -99,9 +99,9 @@ type Node struct { privValidator types.PrivValidator // local node's validator key // network - sw *p2p.Switch // p2p connections - addrBook pex.AddrBook // known peers - trustMetricStore *trust.TrustMetricStore // trust metrics for all peers + sw *p2p.Switch // p2p connections + addrBook pex.AddrBook // known peers + // trustMetricStore *trust.TrustMetricStore // trust metrics for all peers // services eventBus *types.EventBus // pub/sub for services @@ -262,20 +262,33 @@ func NewNode(config *cfg.Config, sw.AddReactor("EVIDENCE", evidenceReactor) // Optionally, start the pex reactor + // + // TODO: + // + // We need to set Seeds and PersistentPeers on the switch, + // since it needs to be able to use these (and their DNS names) + // even if the PEX is off. We can include the DNS name in the NetAddress, + // but it would still be nice to have a clear list of the current "PersistentPeers" + // somewhere that we can return with net_info. + // + // Let's assume we always have IDs ... and we just dont authenticate them + // if auth_enc=false. + // + // If PEX is on, it should handle dialing the seeds. Otherwise the switch does it. var addrBook pex.AddrBook - var trustMetricStore *trust.TrustMetricStore if config.P2P.PexReactor { addrBook = pex.NewAddrBook(config.P2P.AddrBookFile(), config.P2P.AddrBookStrict) addrBook.SetLogger(p2pLogger.With("book", config.P2P.AddrBookFile())) - // Get the trust metric history data - trustHistoryDB, err := dbProvider(&DBContext{"trusthistory", config}) - if err != nil { - return nil, err + var seeds []string + if config.P2P.Seeds != "" { + seeds = strings.Split(config.P2P.Seeds, ",") } - trustMetricStore = trust.NewTrustMetricStore(trustHistoryDB, trust.DefaultConfig()) - trustMetricStore.SetLogger(p2pLogger) - + var privatePeerIDs []string + if config.P2P.PrivatePeerIDs != "" { + privatePeerIDs = strings.Split(config.P2P.PrivatePeerIDs, ",") + } + // TODO persistent peers ? so we can have their DNS addrs saved pexReactor := pex.NewPEXReactor(addrBook, &pex.PEXReactorConfig{ Seeds: cmn.SplitAndTrim(config.P2P.Seeds, ",", " "), @@ -355,9 +368,8 @@ func NewNode(config *cfg.Config, genesisDoc: genDoc, privValidator: privValidator, - sw: sw, - addrBook: addrBook, - trustMetricStore: trustMetricStore, + sw: sw, + addrBook: addrBook, stateDB: stateDB, blockStore: blockStore, diff --git a/p2p/netaddress.go b/p2p/netaddress.go index 6b88f5670..6cf86f22e 100644 --- a/p2p/netaddress.go +++ b/p2p/netaddress.go @@ -22,6 +22,7 @@ type NetAddress struct { ID ID IP net.IP Port uint16 + Name string // optional DNS name str string } diff --git a/p2p/pex/pex_reactor.go b/p2p/pex/pex_reactor.go index 70ae7ed11..7897a5534 100644 --- a/p2p/pex/pex_reactor.go +++ b/p2p/pex/pex_reactor.go @@ -20,7 +20,10 @@ const ( // PexChannel is a channel for PEX messages PexChannel = byte(0x00) - maxMsgSize = 1048576 // 1MB + // TODO: make smaller. Should match the maxGetSelection + // this is basically the amplification factor since a request + // msg is like 1 byte ... it can cause us to send msgs of this size! + maxPexMessageSize = 1048576 // 1MB // ensure we have enough peers defaultEnsurePeersPeriod = 30 * time.Second @@ -61,7 +64,7 @@ type PEXReactor struct { book AddrBook config *PEXReactorConfig - ensurePeersPeriod time.Duration + ensurePeersPeriod time.Duration // TODO: should go in the config // maps to prevent abuse requestsSent *cmn.CMap // ID->struct{}: unanswered send requests @@ -70,6 +73,12 @@ type PEXReactor struct { attemptsToDial sync.Map // address (string) -> {number of attempts (int), last time dialed (time.Time)} } +func (pexR *PEXReactor) minReceiveRequestInterval() time.Duration { + // NOTE: must be less than ensurePeersPeriod, otherwise we'll request + // peers too quickly from others and they'll think we're bad! + return pexR.ensurePeersPeriod / 3 +} + // PEXReactorConfig holds reactor specific configuration data. type PEXReactorConfig struct { // Seed/Crawler mode @@ -113,6 +122,9 @@ func (r *PEXReactor) OnStart() error { } // return err if user provided a bad seed address + // NOTE: only if its an invalid address. + // If we simply fail to resovle a DNS name, + // we shouldn't exit here ... if err := r.checkSeeds(); err != nil { return err } @@ -195,6 +207,10 @@ func (r *PEXReactor) Receive(chID byte, src Peer, msgBytes []byte) { } // Seeds disconnect after sending a batch of addrs + // NOTE: this is a prime candidate for amplification attacks + // so it's important we + // 1) restrict how frequently peers can request + // 2) limit the output size if r.config.SeedMode { r.SendAddrs(src, r.book.GetSelectionWithBias(biasToSelectNewPeers)) r.Switch.StopPeerGracefully(src) @@ -213,6 +229,7 @@ func (r *PEXReactor) Receive(chID byte, src Peer, msgBytes []byte) { } } +// enforces a minimum amount of time between requests func (r *PEXReactor) receiveRequest(src Peer) error { id := string(src.ID()) v := r.lastReceivedRequests.Get(id) @@ -232,8 +249,14 @@ func (r *PEXReactor) receiveRequest(src Peer) error { } now := time.Now() - if now.Sub(lastReceived) < r.ensurePeersPeriod/3 { - return fmt.Errorf("Peer (%v) is sending too many PEX requests. Disconnecting", src.ID()) + minInterval := r.minReceiveRequestInterval() + if now.Sub(lastReceived) < minInterval { + return fmt.Errorf("Peer (%v) send next PEX request too soon. lastReceived: %v, now: %v, minInterval: %v. Disconnecting", + src.ID(), + lastReceived, + now, + minInterval, + ) } r.lastReceivedRequests.Set(id, now) return nil @@ -264,7 +287,11 @@ func (r *PEXReactor) ReceiveAddrs(addrs []*p2p.NetAddress, src Peer) error { srcAddr := src.NodeInfo().NetAddress() for _, netAddr := range addrs { + // TODO: make sure correct nodes never send nil and return error + // if a netAddr == nil if netAddr != nil && !isAddrPrivate(netAddr, r.config.PrivatePeerIDs) { + // TODO: Should we moe the list of private peers into the AddrBook so AddAddress + // can do the check for us, and we don't have to worry about checking before calling ? err := r.book.AddAddress(netAddr, srcAddr) if err != nil { r.Logger.Error("Failed to add new address", "err", err) @@ -360,6 +387,9 @@ func (r *PEXReactor) ensurePeers() { if connected := r.Switch.Peers().Has(try.ID); connected { continue } + // TODO: consider moving some checks from toDial into here + // so we don't even consider dialing peers that we want to wait + // before dialling again, or have dialled too many times already r.Logger.Info("Will dial address", "addr", try) toDial[try.ID] = try } @@ -387,13 +417,17 @@ func (r *PEXReactor) ensurePeers() { } } -func (r *PEXReactor) dialPeer(addr *p2p.NetAddress) { - var attempts int - var lastDialed time.Time - if lAttempts, attempted := r.attemptsToDial.Load(addr.DialString()); attempted { - attempts = lAttempts.(_attemptsToDial).number - lastDialed = lAttempts.(_attemptsToDial).lastDialed +func (r *PEXReactor) dialAttemptsInfo(addr *p2p.NetAddress) (attempts int, lastDialed time.Time) { + _attempts, ok := r.attemptsToDial.Load(addr.DialString()) + if !ok { + return } + atd := _attempts.(_attemptsToDial) + return atd.number, atd.lastDialed +} + +func (r *PEXReactor) dialPeer(addr *p2p.NetAddress) { + attempts, lastDialed := r.dialAttemptsInfo(addr) if attempts > maxAttemptsToDial { r.Logger.Error("Reached max attempts to dial", "addr", addr, "attempts", attempts) @@ -439,6 +473,9 @@ func (r *PEXReactor) checkSeeds() error { if lSeeds == 0 { return nil } + // TODO: don't exit the program if we simply cant resolve a DNS name. + // But if names or addresses are incorrectly speficied (ie. invalid), + // then we should return an err that causes an exit _, errs := p2p.NewNetAddressStrings(r.config.Seeds) for _, err := range errs { if err != nil { diff --git a/p2p/switch.go b/p2p/switch.go index b47b967f9..598fac468 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -351,6 +351,7 @@ func (sw *Switch) IsDialing(id ID) bool { } // DialPeersAsync dials a list of peers asynchronously in random order (optionally, making them persistent). +// TODO: remove addrBook arg since it's now set on the switch func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent bool) error { netAddrs, errs := NewNetAddressStrings(peers) // only log errors, dial correct addresses @@ -360,7 +361,10 @@ func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent b ourAddr := sw.nodeInfo.NetAddress() - // TODO: move this out of here ? + // TODO: this code feels like it's in the wrong place. + // The integration tests depend on the addrBook being saved + // right away but maybe we can change that. Recall that + // the addrBook is only written to disk every 2min if addrBook != nil { // add peers to `addrBook` for _, netAddr := range netAddrs { diff --git a/rpc/core/net.go b/rpc/core/net.go index 9b04926ab..d3123b9eb 100644 --- a/rpc/core/net.go +++ b/rpc/core/net.go @@ -47,6 +47,8 @@ func NetInfo() (*ctypes.ResultNetInfo, error) { ConnectionStatus: peer.Status(), }) } + // TODO: should we include "num_peers" field for convenience ? + // Let's also include the PersistentPeers and Seeds in here. return &ctypes.ResultNetInfo{ Listening: listening, Listeners: listeners, From c23909eecf14b7ebb2fe66f70c3c2397aeab15fe Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 13:08:44 -0400 Subject: [PATCH 07/30] p2p/pex: minor cleanup and comments --- p2p/pex/addrbook.go | 47 ++++++++++++++++++++++--------------- p2p/pex/pex_reactor.go | 53 ++++++++++++++++++++++++++++-------------- p2p/switch.go | 2 ++ 3 files changed, 65 insertions(+), 37 deletions(-) diff --git a/p2p/pex/addrbook.go b/p2p/pex/addrbook.go index 56a4c5163..08646ae76 100644 --- a/p2p/pex/addrbook.go +++ b/p2p/pex/addrbook.go @@ -7,7 +7,6 @@ package pex import ( "crypto/sha256" "encoding/binary" - "fmt" "math" "net" "sync" @@ -169,7 +168,9 @@ func (a *addrBook) OurAddress(addr *p2p.NetAddress) bool { return ok } -// AddAddress implements AddrBook - adds the given address as received from the given source. +// AddAddress implements AddrBook +// Add address to a "new" bucket. If it's already in one, only add it probabilistically. +// Returns error if the addr is non-routable. Does not add self. // NOTE: addr must not be nil func (a *addrBook) AddAddress(addr *p2p.NetAddress, src *p2p.NetAddress) error { a.mtx.Lock() @@ -298,25 +299,27 @@ func (a *addrBook) GetSelection() []*p2p.NetAddress { a.mtx.Lock() defer a.mtx.Unlock() - if a.size() == 0 { + bookSize := a.size() + if bookSize == 0 { return nil } - allAddr := make([]*p2p.NetAddress, a.size()) + numAddresses := cmn.MaxInt( + cmn.MinInt(minGetSelection, bookSize), + bookSize*getSelectionPercent/100) + numAddresses = cmn.MinInt(maxGetSelection, numAddresses) + + // XXX: instead of making a list of all addresses, shuffling, and slicing a random chunk, + // could we just select a random numAddresses of indexes? + allAddr := make([]*p2p.NetAddress, bookSize) i := 0 for _, ka := range a.addrLookup { allAddr[i] = ka.Addr i++ } - numAddresses := cmn.MaxInt( - cmn.MinInt(minGetSelection, len(allAddr)), - len(allAddr)*getSelectionPercent/100) - numAddresses = cmn.MinInt(maxGetSelection, numAddresses) - // Fisher-Yates shuffle the array. We only need to do the first // `numAddresses' since we are throwing the rest. - // XXX: What's the point of this if we already loop randomly through addrLookup ? for i := 0; i < numAddresses; i++ { // pick a number between current index and the end j := cmn.RandIntn(len(allAddr)-i) + i @@ -338,7 +341,8 @@ func (a *addrBook) GetSelectionWithBias(biasTowardsNewAddrs int) []*p2p.NetAddre a.mtx.Lock() defer a.mtx.Unlock() - if a.size() == 0 { + bookSize := a.size() + if bookSize == 0 { return nil } @@ -350,8 +354,8 @@ func (a *addrBook) GetSelectionWithBias(biasTowardsNewAddrs int) []*p2p.NetAddre } numAddresses := cmn.MaxInt( - cmn.MinInt(minGetSelection, a.size()), - a.size()*getSelectionPercent/100) + cmn.MinInt(minGetSelection, bookSize), + bookSize*getSelectionPercent/100) numAddresses = cmn.MinInt(maxGetSelection, numAddresses) selection := make([]*p2p.NetAddress, numAddresses) @@ -605,12 +609,19 @@ func (a *addrBook) pickOldest(bucketType byte, bucketIdx int) *knownAddress { // adds the address to a "new" bucket. if its already in one, // it only adds it probabilistically func (a *addrBook) addAddress(addr, src *p2p.NetAddress) error { + if addr == nil { + return ErrAddrBookNilAddr{addr, src} + } + if src == nil { + return ErrAddrBookNilAddr{addr, src} + } + if a.routabilityStrict && !addr.Routable() { - return fmt.Errorf("Cannot add non-routable address %v", addr) + return ErrAddrBookNonRoutable{addr} } + // TODO: we should track ourAddrs by ID and by IP:PORT and refuse both. if _, ok := a.ourAddrs[addr.String()]; ok { - // Ignore our own listener address. - return fmt.Errorf("Cannot add ourselves with address %v", addr) + return ErrAddrBookSelf } ka := a.addrLookup[addr.ID] @@ -636,10 +647,8 @@ func (a *addrBook) addAddress(addr, src *p2p.NetAddress) error { bucket := a.calcNewBucket(addr, src) added := a.addToNewBucket(ka, bucket) if !added { - a.Logger.Info("Can't add new address, addr book is full", "address", addr, "total", a.size()) + return ErrAddrBookFull{addr, a.size()} } - - a.Logger.Info("Added new address", "address", addr, "total", a.size()) return nil } diff --git a/p2p/pex/pex_reactor.go b/p2p/pex/pex_reactor.go index 7897a5534..c6ae4f22d 100644 --- a/p2p/pex/pex_reactor.go +++ b/p2p/pex/pex_reactor.go @@ -167,17 +167,28 @@ func (r *PEXReactor) AddPeer(p Peer) { r.RequestAddrs(p) } } else { - // For inbound peers, the peer is its own source, - // and its NodeInfo has already been validated. - // Let the ensurePeersRoutine handle asking for more - // peers when we need - we don't trust inbound peers as much. + // inbound peer is its own source addr := p.NodeInfo().NetAddress() - if !isAddrPrivate(addr, r.config.PrivatePeerIDs) { - err := r.book.AddAddress(addr, addr) - if err != nil { + src := addr + + // ignore private addrs + if isAddrPrivate(addr, r.config.PrivatePeerIDs) { + return + } + + // add to book. dont RequestAddrs right away because + // we don't trust inbound as much - let ensurePeersRoutine handle it. + err := r.book.AddAddress(addr, src) + if err != nil { + switch err.(type) { + case ErrAddrBookNilAddr: r.Logger.Error("Failed to add new address", "err", err) + default: + // non-routable, self, full book, etc. + r.Logger.Debug("Failed to add new address", "err", err) } } + } } @@ -277,25 +288,31 @@ func (r *PEXReactor) RequestAddrs(p Peer) { // request for this peer and deletes the open request. // If there's no open request for the src peer, it returns an error. func (r *PEXReactor) ReceiveAddrs(addrs []*p2p.NetAddress, src Peer) error { - id := string(src.ID()) + id := string(src.ID()) if !r.requestsSent.Has(id) { return cmn.NewError("Received unsolicited pexAddrsMessage") } - r.requestsSent.Delete(id) srcAddr := src.NodeInfo().NetAddress() for _, netAddr := range addrs { // TODO: make sure correct nodes never send nil and return error - // if a netAddr == nil - if netAddr != nil && !isAddrPrivate(netAddr, r.config.PrivatePeerIDs) { - // TODO: Should we moe the list of private peers into the AddrBook so AddAddress - // can do the check for us, and we don't have to worry about checking before calling ? - err := r.book.AddAddress(netAddr, srcAddr) - if err != nil { - r.Logger.Error("Failed to add new address", "err", err) - } + if netAddr == nil { + return cmn.NewError("received nil addr") + } + + // ignore private peers + // TODO: give private peers to AddrBook so it can enforce this on AddAddress. + // We'd then have to check for ErrPrivatePeer on AddAddress here, which is + // an error we just ignore (maybe peer is probing us for our private peers :P) + if isAddrPrivate(netAddr, r.config.PrivatePeerIDs) { + continue + } + + err := r.book.AddAddress(netAddr, srcAddr) + if err != nil { + r.Logger.Error("Failed to add new address", "err", err) } } return nil @@ -627,7 +644,7 @@ func (r *PEXReactor) attemptDisconnects() { } } -// isAddrPrivate returns true if addr is private. +// isAddrPrivate returns true if addr.ID is a private ID. func isAddrPrivate(addr *p2p.NetAddress, privatePeerIDs []string) bool { for _, id := range privatePeerIDs { if string(addr.ID) == id { diff --git a/p2p/switch.go b/p2p/switch.go index 598fac468..7a06a69f0 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -260,6 +260,7 @@ func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) { sw.stopAndRemovePeer(peer, reason) if peer.IsPersistent() { + // NOTE: this is the self-reported addr, not the original we dialed go sw.reconnectToPeer(peer.NodeInfo().NetAddress()) } } @@ -351,6 +352,7 @@ func (sw *Switch) IsDialing(id ID) bool { } // DialPeersAsync dials a list of peers asynchronously in random order (optionally, making them persistent). +// Used to dial peers from config on startup or from unsafe-RPC (trusted sources). // TODO: remove addrBook arg since it's now set on the switch func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent bool) error { netAddrs, errs := NewNetAddressStrings(peers) From 40c79235c0829f8654644a6852e13fc040d06754 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 13:09:17 -0400 Subject: [PATCH 08/30] p2p: dont require minor versions to match in handshake --- p2p/node_info.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/p2p/node_info.go b/p2p/node_info.go index e46236188..73af1a4af 100644 --- a/p2p/node_info.go +++ b/p2p/node_info.go @@ -56,7 +56,7 @@ func (info NodeInfo) Validate() error { } // CompatibleWith checks if two NodeInfo are compatible with eachother. -// CONTRACT: two nodes are compatible if the major/minor versions match and network match +// CONTRACT: two nodes are compatible if the major version matches and network match // and they have at least one channel in common. func (info NodeInfo) CompatibleWith(other NodeInfo) error { iMajor, iMinor, _, iErr := splitVersion(info.Version) @@ -77,9 +77,9 @@ func (info NodeInfo) CompatibleWith(other NodeInfo) error { return fmt.Errorf("Peer is on a different major version. Got %v, expected %v", oMajor, iMajor) } - // minor version must match + // minor version can differ if iMinor != oMinor { - return fmt.Errorf("Peer is on a different minor version. Got %v, expected %v", oMinor, iMinor) + // ok } // nodes must be on the same network From f645187122e3cd6f3aa3dcfbe81ce327459d62b8 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 13:10:27 -0400 Subject: [PATCH 09/30] spec: pex update --- .../new-spec/reactors/pex/pex.md | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/docs/specification/new-spec/reactors/pex/pex.md b/docs/specification/new-spec/reactors/pex/pex.md index 29d65d17d..0173704e2 100644 --- a/docs/specification/new-spec/reactors/pex/pex.md +++ b/docs/specification/new-spec/reactors/pex/pex.md @@ -21,14 +21,20 @@ When we have no peers, or have been unable to find enough peers from existing on we dial a randomly selected seed to get a list of peers to dial. On startup, we will also immediately dial the given list of `persisten_peers`, -and will attempt to maintain persistent connections with them. If the connections die, +and will attempt to maintain persistent connections with them. If the connections die, or we fail to dail, we will redial every 5s for a a few minutes, then switch to an exponential backoff schedule, and after about a day of trying, stop dialing the peer. -So long as we have less than `MaxPeers`, we periodically request additional peers +So long as we have less than `MinNumOutboundPeers`, we periodically request additional peers from each of our own. If sufficient time goes by and we still can't find enough peers, we try the seeds again. +## Listening + +Peers listen on a configurable ListenAddr that they self-report during +handshakes with other peers. + + ## Address Book Peers are tracked via their ID (their PubKey.Address()). @@ -42,6 +48,9 @@ unvetted peers. Buckets provide randomization over peer selection. A vetted peer can only be in one bucket. An unvetted peer can be in multiple buckets. +If there's no space in the book, we check the bucket for bad peers, which include peers we've attempted to +dial 3 times, and then remove them from only that bucket. + ## Vetting When a peer is first added, it is unvetted. @@ -74,18 +83,10 @@ Connection attempts are made with exponential backoff (plus jitter). Because the selection process happens every `ensurePeersPeriod`, we might not end up dialing a peer for much longer than the backoff duration. -....... ------------------ -TODO: put this in the right place. And add more details. - -AddrBook: If there's no space in the book, we check the bucket for bad peers, which include peers we've attempted to -dial 3 times, and then remove them from only that bucket. +TODO PEX: if we fail to conenct to the peer after 16 tries (with exponential backoff), we remove from address book completely. ------------------ -....... - ## Select Peers to Exchange From 32268a8135f04e087e0478da32e9f61e6fb95ea9 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 14:41:36 -0400 Subject: [PATCH 10/30] limit maxPexMessageSize based on maxAddressSize --- p2p/pex/pex_reactor.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/p2p/pex/pex_reactor.go b/p2p/pex/pex_reactor.go index c6ae4f22d..dfe54e0d6 100644 --- a/p2p/pex/pex_reactor.go +++ b/p2p/pex/pex_reactor.go @@ -20,10 +20,14 @@ const ( // PexChannel is a channel for PEX messages PexChannel = byte(0x00) - // TODO: make smaller. Should match the maxGetSelection - // this is basically the amplification factor since a request - // msg is like 1 byte ... it can cause us to send msgs of this size! - maxPexMessageSize = 1048576 // 1MB + // over-estimate of max NetAddress size + // hexID (40) + IP (16) + Port (2) + Name (100) ... + // NOTE: dont use massive DNS name .. + maxAddressSize = 256 + + // NOTE: amplificaiton factor! + // small request results in up to maxPexMessageSize response + maxPexMessageSize = maxAddressSize * maxGetSelection // ensure we have enough peers defaultEnsurePeersPeriod = 30 * time.Second From 3ee1d7909ead44cd47c7fd5b1475a7c27b0e606a Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 14:48:51 -0400 Subject: [PATCH 11/30] p2p: explicit netaddress errors --- p2p/errors.go | 28 ++++++++++++++++++++++++++++ p2p/netaddress.go | 30 +++++++++++++++++------------- p2p/node_info.go | 21 +++++++++++++++++++-- 3 files changed, 64 insertions(+), 15 deletions(-) diff --git a/p2p/errors.go b/p2p/errors.go index cb6a7051a..91b8f96c2 100644 --- a/p2p/errors.go +++ b/p2p/errors.go @@ -18,3 +18,31 @@ type ErrSwitchAuthenticationFailure struct { func (e ErrSwitchAuthenticationFailure) Error() string { return fmt.Sprintf("Failed to authenticate peer. Dialed %v, but got peer with ID %s", e.Dialed, e.Got) } + +//------------------------------------------------------------------- + +type ErrNetAddressNoID struct { + Addr string +} + +func (e ErrNetAddressNoID) Error() string { + return fmt.Errorf("Address (%s) does not contain ID", e.Addr) +} + +type ErrNetAddressInvalid struct { + Addr string + Err error +} + +func (e ErrNetAddressInvalid) Error() string { + return fmt.Errorf("Invalid address (%s): %v", e.Addr, e.Err) +} + +type ErrNetAddressLookup struct { + Addr string + Err error +} + +func (e ErrNetAddressLookup) Error() string { + return fmt.Errorf("Error looking up host (%s): %v", e.Addr, e.Err) +} diff --git a/p2p/netaddress.go b/p2p/netaddress.go index 6cf86f22e..80b8e18bb 100644 --- a/p2p/netaddress.go +++ b/p2p/netaddress.go @@ -19,11 +19,13 @@ import ( // NetAddress defines information about a peer on the network // including its ID, IP address, and port. type NetAddress struct { - ID ID - IP net.IP - Port uint16 - Name string // optional DNS name - str string + ID ID `json:"id"` + IP net.IP `json:"ip"` + Port uint16 `json:"port"` + Name string `json:"name"` // optional DNS name + + // memoize .String() + str string } // IDAddressString returns id@hostPort. @@ -57,10 +59,11 @@ func NewNetAddress(id ID, addr net.Addr) *NetAddress { // NewNetAddressString returns a new NetAddress using the provided address in // the form of "ID@IP:Port". // Also resolves the host if host is not an IP. +// Errors are of type ErrNetAddressXxx where Xxx is in (NoID, Invalid, Lookup) func NewNetAddressString(addr string) (*NetAddress, error) { spl := strings.Split(addr, "@") if len(spl) < 2 { - return nil, fmt.Errorf("Address (%s) does not contain ID", addr) + return nil, ErrNetAddressNoID{addr} } return NewNetAddressStringWithOptionalID(addr) } @@ -77,11 +80,12 @@ func NewNetAddressStringWithOptionalID(addr string) (*NetAddress, error) { idStr := spl[0] idBytes, err := hex.DecodeString(idStr) if err != nil { - return nil, cmn.ErrorWrap(err, fmt.Sprintf("Address (%s) contains invalid ID", addrWithoutProtocol)) + return nil, ErrNetAddressInvalid{addrWithoutProtocol, err} } if len(idBytes) != IDByteLength { - return nil, fmt.Errorf("Address (%s) contains ID of invalid length (%d). Should be %d hex-encoded bytes", - addrWithoutProtocol, len(idBytes), IDByteLength) + return nil, ErrNetAddressInvalid{ + addrWithoutProtocol, + fmt.Errorf("invalid hex length - got %d, expected %d", len(idBytes), IDByteLength)} } id, addrWithoutProtocol = ID(idStr), spl[1] @@ -89,7 +93,7 @@ func NewNetAddressStringWithOptionalID(addr string) (*NetAddress, error) { host, portStr, err := net.SplitHostPort(addrWithoutProtocol) if err != nil { - return nil, err + return nil, ErrNetAddressInvalid{addrWithoutProtocol, err} } ip := net.ParseIP(host) @@ -97,7 +101,7 @@ func NewNetAddressStringWithOptionalID(addr string) (*NetAddress, error) { if len(host) > 0 { ips, err := net.LookupIP(host) if err != nil { - return nil, err + return nil, ErrNetAddressLookup{host, err} } ip = ips[0] } @@ -105,7 +109,7 @@ func NewNetAddressStringWithOptionalID(addr string) (*NetAddress, error) { port, err := strconv.ParseUint(portStr, 10, 16) if err != nil { - return nil, err + return nil, ErrNetAddressInvalid{portStr, err} } na := NewNetAddressIPPort(ip, uint16(port)) @@ -121,7 +125,7 @@ func NewNetAddressStrings(addrs []string) ([]*NetAddress, []error) { for _, addr := range addrs { netAddr, err := NewNetAddressString(addr) if err != nil { - errs = append(errs, fmt.Errorf("Error in address %s: %v", addr, err)) + errs = append(errs, err) } else { netAddrs = append(netAddrs, netAddr) } diff --git a/p2p/node_info.go b/p2p/node_info.go index 73af1a4af..7d149082c 100644 --- a/p2p/node_info.go +++ b/p2p/node_info.go @@ -21,6 +21,7 @@ func MaxNodeInfoSize() int { // between two peers during the Tendermint P2P handshake. type NodeInfo struct { // Authenticate + // TODO: replace with NetAddress ID ID `json:"id"` // authenticated identifier ListenAddr string `json:"listen_addr"` // accepting incoming @@ -37,7 +38,9 @@ type NodeInfo struct { // Validate checks the self-reported NodeInfo is safe. // It returns an error if there -// are too many Channels or any duplicate Channels. +// are too many Channels, if there are any duplicate Channels, +// if the ListenAddr is malformed, or if the ListenAddr is a host name +// that can not be resolved to some IP. // TODO: constraints for Moniker/Other? Or is that for the UI ? func (info NodeInfo) Validate() error { if len(info.Channels) > maxNumChannels { @@ -52,6 +55,13 @@ func (info NodeInfo) Validate() error { } channels[ch] = struct{}{} } + + // ensure ListenAddr is good + netAddr, err := NewNetAddressString(IDAddressString(info.ID, info.ListenAddr)) + if err != nil { + return err + } + return nil } @@ -116,7 +126,14 @@ OUTER_LOOP: func (info NodeInfo) NetAddress() *NetAddress { netAddr, err := NewNetAddressString(IDAddressString(info.ID, info.ListenAddr)) if err != nil { - panic(err) // everything should be well formed by now + switch err.(type) { + case ErrNetAddressLookup: + // XXX If the peer provided a host name and the lookup fails here + // we're out of luck. + // TODO: use a NetAddress in NodeInfo + default: + panic(err) // everything should be well formed by now + } } return netAddr } From aaa81092e75b22d97503ac0944779d3450045285 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 15:01:33 -0400 Subject: [PATCH 12/30] p2p: some comments and a log line --- docs/specification/new-spec/reactors/pex/pex.md | 5 +---- p2p/pex/addrbook.go | 2 ++ p2p/pex/pex_reactor.go | 12 +++++++++--- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/docs/specification/new-spec/reactors/pex/pex.md b/docs/specification/new-spec/reactors/pex/pex.md index 0173704e2..d24271ebf 100644 --- a/docs/specification/new-spec/reactors/pex/pex.md +++ b/docs/specification/new-spec/reactors/pex/pex.md @@ -83,10 +83,7 @@ Connection attempts are made with exponential backoff (plus jitter). Because the selection process happens every `ensurePeersPeriod`, we might not end up dialing a peer for much longer than the backoff duration. - -TODO -PEX: if we fail to conenct to the peer after 16 tries (with exponential backoff), we remove from address book completely. - +If we fail to conenct to the peer after 16 tries (with exponential backoff), we remove from address book completely. ## Select Peers to Exchange diff --git a/p2p/pex/addrbook.go b/p2p/pex/addrbook.go index 08646ae76..aff07b662 100644 --- a/p2p/pex/addrbook.go +++ b/p2p/pex/addrbook.go @@ -295,6 +295,7 @@ func (a *addrBook) MarkBad(addr *p2p.NetAddress) { // GetSelection implements AddrBook. // It randomly selects some addresses (old & new). Suitable for peer-exchange protocols. +// Must never return a nil address. func (a *addrBook) GetSelection() []*p2p.NetAddress { a.mtx.Lock() defer a.mtx.Unlock() @@ -332,6 +333,7 @@ func (a *addrBook) GetSelection() []*p2p.NetAddress { // GetSelectionWithBias implements AddrBook. // It randomly selects some addresses (old & new). Suitable for peer-exchange protocols. +// Must never return a nil address. // // Each address is picked randomly from an old or new bucket according to the // biasTowardsNewAddrs argument, which must be between [0, 100] (or else is truncated to diff --git a/p2p/pex/pex_reactor.go b/p2p/pex/pex_reactor.go index dfe54e0d6..82b772a64 100644 --- a/p2p/pex/pex_reactor.go +++ b/p2p/pex/pex_reactor.go @@ -301,7 +301,7 @@ func (r *PEXReactor) ReceiveAddrs(addrs []*p2p.NetAddress, src Peer) error { srcAddr := src.NodeInfo().NetAddress() for _, netAddr := range addrs { - // TODO: make sure correct nodes never send nil and return error + // NOTE: GetSelection methods should never return nil addrs if netAddr == nil { return cmn.NewError("received nil addr") } @@ -316,7 +316,13 @@ func (r *PEXReactor) ReceiveAddrs(addrs []*p2p.NetAddress, src Peer) error { err := r.book.AddAddress(netAddr, srcAddr) if err != nil { - r.Logger.Error("Failed to add new address", "err", err) + switch err.(type) { + case ErrAddrBookNilAddr: + r.Logger.Error("Failed to add new address", "err", err) + default: + // Could be non-routable, self, or full book. But not worth logging an Error + r.Logger.Debug("Failed to add new address", "err", err) + } } } return nil @@ -410,7 +416,7 @@ func (r *PEXReactor) ensurePeers() { } // TODO: consider moving some checks from toDial into here // so we don't even consider dialing peers that we want to wait - // before dialling again, or have dialled too many times already + // before dialling again, or have dialed too many times already r.Logger.Info("Will dial address", "addr", try) toDial[try.ID] = try } From 268055e549ddd697ea10b76bb09ed5ae6eee6b1c Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 15:04:37 -0400 Subject: [PATCH 13/30] node: remove dup code from rebase --- node/node.go | 9 --------- 1 file changed, 9 deletions(-) diff --git a/node/node.go b/node/node.go index 00d760664..408dee7ef 100644 --- a/node/node.go +++ b/node/node.go @@ -6,7 +6,6 @@ import ( "fmt" "net" "net/http" - "strings" abci "github.com/tendermint/abci/types" amino "github.com/tendermint/go-amino" @@ -280,14 +279,6 @@ func NewNode(config *cfg.Config, addrBook = pex.NewAddrBook(config.P2P.AddrBookFile(), config.P2P.AddrBookStrict) addrBook.SetLogger(p2pLogger.With("book", config.P2P.AddrBookFile())) - var seeds []string - if config.P2P.Seeds != "" { - seeds = strings.Split(config.P2P.Seeds, ",") - } - var privatePeerIDs []string - if config.P2P.PrivatePeerIDs != "" { - privatePeerIDs = strings.Split(config.P2P.PrivatePeerIDs, ",") - } // TODO persistent peers ? so we can have their DNS addrs saved pexReactor := pex.NewPEXReactor(addrBook, &pex.PEXReactorConfig{ From 0450e35d679892dc628e0bac19952c5020462f22 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 15:19:33 -0400 Subject: [PATCH 14/30] some comments --- p2p/netaddress.go | 4 +++- p2p/pex/pex_reactor.go | 7 +------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/p2p/netaddress.go b/p2p/netaddress.go index 80b8e18bb..3e0d99d69 100644 --- a/p2p/netaddress.go +++ b/p2p/netaddress.go @@ -22,7 +22,9 @@ type NetAddress struct { ID ID `json:"id"` IP net.IP `json:"ip"` Port uint16 `json:"port"` - Name string `json:"name"` // optional DNS name + + // TODO: + // Name string `json:"name"` // optional DNS name // memoize .String() str string diff --git a/p2p/pex/pex_reactor.go b/p2p/pex/pex_reactor.go index 82b772a64..b139e4aba 100644 --- a/p2p/pex/pex_reactor.go +++ b/p2p/pex/pex_reactor.go @@ -126,9 +126,7 @@ func (r *PEXReactor) OnStart() error { } // return err if user provided a bad seed address - // NOTE: only if its an invalid address. - // If we simply fail to resovle a DNS name, - // we shouldn't exit here ... + // or a host name that we cant resolve if err := r.checkSeeds(); err != nil { return err } @@ -500,9 +498,6 @@ func (r *PEXReactor) checkSeeds() error { if lSeeds == 0 { return nil } - // TODO: don't exit the program if we simply cant resolve a DNS name. - // But if names or addresses are incorrectly speficied (ie. invalid), - // then we should return an err that causes an exit _, errs := p2p.NewNetAddressStrings(r.config.Seeds) for _, err := range errs { if err != nil { From 64569b15e5aaea7024f1fd26d2c3f72e18d7e7a5 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 15:39:09 -0400 Subject: [PATCH 15/30] fix build and test --- CHANGELOG.md | 12 +++++++++++- p2p/errors.go | 6 +++--- p2p/node_info.go | 2 +- p2p/pex/addrbook.go | 2 +- p2p/pex/params.go | 2 +- p2p/pex/pex_reactor.go | 4 ++-- p2p/test_util.go | 4 +++- 7 files changed, 22 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c2bdb3f18..f1154baae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,9 +26,19 @@ BUG FIXES: ## 0.19.2 (TBD) +FEATURES: + +- [p2p] Allow peers with different Minor versions to connect + +IMPROVEMENTS: + +- [p2p] Various code comments cleanup + BUG FIXES: -- Fix reconnect to persistent peer when first dial fails +- [p2p] Fix reconnect to persistent peer when first dial fails +- [p2p] Validate NodeInfo.ListenAddr +- [p2p/pex] Limit max msg size to 64kB ## 0.19.1 (April 27th, 2018) diff --git a/p2p/errors.go b/p2p/errors.go index 91b8f96c2..f4a09e6c0 100644 --- a/p2p/errors.go +++ b/p2p/errors.go @@ -26,7 +26,7 @@ type ErrNetAddressNoID struct { } func (e ErrNetAddressNoID) Error() string { - return fmt.Errorf("Address (%s) does not contain ID", e.Addr) + return fmt.Sprintf("Address (%s) does not contain ID", e.Addr) } type ErrNetAddressInvalid struct { @@ -35,7 +35,7 @@ type ErrNetAddressInvalid struct { } func (e ErrNetAddressInvalid) Error() string { - return fmt.Errorf("Invalid address (%s): %v", e.Addr, e.Err) + return fmt.Sprintf("Invalid address (%s): %v", e.Addr, e.Err) } type ErrNetAddressLookup struct { @@ -44,5 +44,5 @@ type ErrNetAddressLookup struct { } func (e ErrNetAddressLookup) Error() string { - return fmt.Errorf("Error looking up host (%s): %v", e.Addr, e.Err) + return fmt.Sprintf("Error looking up host (%s): %v", e.Addr, e.Err) } diff --git a/p2p/node_info.go b/p2p/node_info.go index 7d149082c..5e08cde1e 100644 --- a/p2p/node_info.go +++ b/p2p/node_info.go @@ -57,7 +57,7 @@ func (info NodeInfo) Validate() error { } // ensure ListenAddr is good - netAddr, err := NewNetAddressString(IDAddressString(info.ID, info.ListenAddr)) + _, err := NewNetAddressString(IDAddressString(info.ID, info.ListenAddr)) if err != nil { return err } diff --git a/p2p/pex/addrbook.go b/p2p/pex/addrbook.go index aff07b662..a93773472 100644 --- a/p2p/pex/addrbook.go +++ b/p2p/pex/addrbook.go @@ -623,7 +623,7 @@ func (a *addrBook) addAddress(addr, src *p2p.NetAddress) error { } // TODO: we should track ourAddrs by ID and by IP:PORT and refuse both. if _, ok := a.ourAddrs[addr.String()]; ok { - return ErrAddrBookSelf + return ErrAddrBookSelf{addr} } ka := a.addrLookup[addr.ID] diff --git a/p2p/pex/params.go b/p2p/pex/params.go index f94e1021c..29b4d45ab 100644 --- a/p2p/pex/params.go +++ b/p2p/pex/params.go @@ -50,6 +50,6 @@ const ( minGetSelection = 32 // max addresses returned by GetSelection - // NOTE: this must match "maxPexMessageSize" + // NOTE: this must match "maxMsgSize" maxGetSelection = 250 ) diff --git a/p2p/pex/pex_reactor.go b/p2p/pex/pex_reactor.go index b139e4aba..bb770e7a8 100644 --- a/p2p/pex/pex_reactor.go +++ b/p2p/pex/pex_reactor.go @@ -26,8 +26,8 @@ const ( maxAddressSize = 256 // NOTE: amplificaiton factor! - // small request results in up to maxPexMessageSize response - maxPexMessageSize = maxAddressSize * maxGetSelection + // small request results in up to maxMsgSize response + maxMsgSize = maxAddressSize * maxGetSelection // ensure we have enough peers defaultEnsurePeersPeriod = 30 * time.Second diff --git a/p2p/test_util.go b/p2p/test_util.go index fe5282a90..2c90bf516 100644 --- a/p2p/test_util.go +++ b/p2p/test_util.go @@ -49,6 +49,8 @@ func CreateRoutableAddr() (addr string, netAddr *NetAddress) { //------------------------------------------------------------------ // Connects switches via arbitrary net.Conn. Used for testing. +const TEST_HOST = "localhost" + // MakeConnectedSwitches returns n switches, connected according to the connect func. // If connect==Connect2Switches, the switches will be fully connected. // initSwitch defines how the i'th switch should be initialized (ie. with what reactors). @@ -56,7 +58,7 @@ func CreateRoutableAddr() (addr string, netAddr *NetAddress) { func MakeConnectedSwitches(cfg *cfg.P2PConfig, n int, initSwitch func(int, *Switch) *Switch, connect func([]*Switch, int, int)) []*Switch { switches := make([]*Switch, n) for i := 0; i < n; i++ { - switches[i] = MakeSwitch(cfg, i, "testing", "123.123.123", initSwitch) + switches[i] = MakeSwitch(cfg, i, TEST_HOST, "123.123.123", initSwitch) } if err := StartSwitches(switches); err != nil { From 2761861b6bef5ca3366bb898ca588e1e8e88a03f Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 15:52:05 -0400 Subject: [PATCH 16/30] p2p: MinNumOutboundPeers. Closes #1501 --- p2p/pex/pex_reactor.go | 2 +- p2p/switch.go | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/p2p/pex/pex_reactor.go b/p2p/pex/pex_reactor.go index bb770e7a8..357926495 100644 --- a/p2p/pex/pex_reactor.go +++ b/p2p/pex/pex_reactor.go @@ -31,7 +31,7 @@ const ( // ensure we have enough peers defaultEnsurePeersPeriod = 30 * time.Second - defaultMinNumOutboundPeers = 10 + defaultMinNumOutboundPeers = p2p.DefaultMinNumOutboundPeers // Seed/Crawler constants diff --git a/p2p/switch.go b/p2p/switch.go index 7a06a69f0..1f0c01f02 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -26,6 +26,10 @@ const ( // ie. 3**10 = 16hrs reconnectBackOffAttempts = 10 reconnectBackOffBaseSeconds = 3 + + // keep at least this many outbound peers + // TODO: move to config + DefaultMinNumOutboundPeers = 10 ) //----------------------------------------------------------------------------- @@ -458,7 +462,8 @@ func (sw *Switch) listenerRoutine(l Listener) { } // ignore connection if we already have enough - maxPeers := sw.config.MaxNumPeers + // leave room for MinNumOutboundPeers + maxPeers := sw.config.MaxNumPeers - DefaultMinNumOutboundPeers if maxPeers <= sw.peers.Size() { sw.Logger.Info("Ignoring inbound connection: already have enough peers", "address", inConn.RemoteAddr().String(), "numPeers", sw.peers.Size(), "max", maxPeers) continue From 6805ddf1b8d5ffa904b83be1e74b09c64bb0c6c5 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 16:00:45 -0400 Subject: [PATCH 17/30] p2p: change some logs from Error to Debug. #1476 --- p2p/pex/pex_reactor.go | 31 +++++++++++++------------------ p2p/switch.go | 9 ++++++--- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/p2p/pex/pex_reactor.go b/p2p/pex/pex_reactor.go index 357926495..b26e7d3af 100644 --- a/p2p/pex/pex_reactor.go +++ b/p2p/pex/pex_reactor.go @@ -181,16 +181,19 @@ func (r *PEXReactor) AddPeer(p Peer) { // add to book. dont RequestAddrs right away because // we don't trust inbound as much - let ensurePeersRoutine handle it. err := r.book.AddAddress(addr, src) - if err != nil { - switch err.(type) { - case ErrAddrBookNilAddr: - r.Logger.Error("Failed to add new address", "err", err) - default: - // non-routable, self, full book, etc. - r.Logger.Debug("Failed to add new address", "err", err) - } - } + r.logErrAddrBook(err) + } +} +func (r *PEXReactor) logErrAddrBook(err error) { + if err != nil { + switch err.(type) { + case ErrAddrBookNilAddr: + r.Logger.Error("Failed to add new address", "err", err) + default: + // non-routable, self, full book, etc. + r.Logger.Debug("Failed to add new address", "err", err) + } } } @@ -313,15 +316,7 @@ func (r *PEXReactor) ReceiveAddrs(addrs []*p2p.NetAddress, src Peer) error { } err := r.book.AddAddress(netAddr, srcAddr) - if err != nil { - switch err.(type) { - case ErrAddrBookNilAddr: - r.Logger.Error("Failed to add new address", "err", err) - default: - // Could be non-routable, self, or full book. But not worth logging an Error - r.Logger.Debug("Failed to add new address", "err", err) - } - } + r.logErrAddrBook(err) } return nil } diff --git a/p2p/switch.go b/p2p/switch.go index 1f0c01f02..5d0bb4f88 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -401,7 +401,12 @@ func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent b sw.randomSleep(0) err := sw.DialPeerWithAddress(addr, persistent) if err != nil { - sw.Logger.Error("Error dialing peer", "err", err) + switch err.(type) { + case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeer: + sw.Logger.Debug("Error dialing peer", "err", err) + default: + sw.Logger.Error("Error dialing peer", "err", err) + } } }(i) } @@ -500,7 +505,6 @@ func (sw *Switch) addOutboundPeerWithConfig(addr *NetAddress, config *PeerConfig sw.Logger.Info("Dialing peer", "address", addr) peerConn, err := newOutboundPeerConn(addr, config, persistent, sw.nodeKey.PrivKey) if err != nil { - sw.Logger.Error("Failed to dial peer", "address", addr, "err", err) if persistent { go sw.reconnectToPeer(addr) } @@ -508,7 +512,6 @@ func (sw *Switch) addOutboundPeerWithConfig(addr *NetAddress, config *PeerConfig } if err := sw.addPeer(peerConn); err != nil { - sw.Logger.Error("Failed to add peer", "address", addr, "err", err) peerConn.CloseConn() return err } From c90bf7756695be62218e9761b10e7a751647082b Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 16:09:12 -0400 Subject: [PATCH 18/30] rpc: add n_peers to /net_info --- CHANGELOG.md | 5 ++++- p2p/switch.go | 2 +- rpc/core/net.go | 7 +++++-- rpc/core/types/responses.go | 1 + 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f1154baae..e3560666a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,15 +29,18 @@ BUG FIXES: FEATURES: - [p2p] Allow peers with different Minor versions to connect +- [rpc] `/net_info` includes `n_peers` IMPROVEMENTS: -- [p2p] Various code comments cleanup +- [p2p] Various code comments, cleanup, error types +- [p2p] Change some Error logs to Debug BUG FIXES: - [p2p] Fix reconnect to persistent peer when first dial fails - [p2p] Validate NodeInfo.ListenAddr +- [p2p] Only allow (MaxNumPeers - MaxNumOutboundPeers) inbound peers - [p2p/pex] Limit max msg size to 64kB ## 0.19.1 (April 27th, 2018) diff --git a/p2p/switch.go b/p2p/switch.go index 5d0bb4f88..ffc1cbb8e 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -401,7 +401,7 @@ func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent b sw.randomSleep(0) err := sw.DialPeerWithAddress(addr, persistent) if err != nil { - switch err.(type) { + switch err { case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeer: sw.Logger.Debug("Error dialing peer", "err", err) default: diff --git a/rpc/core/net.go b/rpc/core/net.go index d3123b9eb..2e931d067 100644 --- a/rpc/core/net.go +++ b/rpc/core/net.go @@ -23,6 +23,7 @@ import ( // { // "error": "", // "result": { +// "n_peers": 0, // "peers": [], // "listeners": [ // "Listener(@10.0.2.15:46656)" @@ -47,11 +48,13 @@ func NetInfo() (*ctypes.ResultNetInfo, error) { ConnectionStatus: peer.Status(), }) } - // TODO: should we include "num_peers" field for convenience ? - // Let's also include the PersistentPeers and Seeds in here. + // TODO: Should we include PersistentPeers and Seeds in here? + // PRO: useful info + // CON: privacy return &ctypes.ResultNetInfo{ Listening: listening, Listeners: listeners, + NPeers: len(peers), Peers: peers, }, nil } diff --git a/rpc/core/types/responses.go b/rpc/core/types/responses.go index be7d51f20..1df7038ae 100644 --- a/rpc/core/types/responses.go +++ b/rpc/core/types/responses.go @@ -100,6 +100,7 @@ func (s *ResultStatus) TxIndexEnabled() bool { type ResultNetInfo struct { Listening bool `json:"listening"` Listeners []string `json:"listeners"` + NPeers int `json:"n_peers"` Peers []Peer `json:"peers"` } From 6157c700dd7efc3dc6e6ffe1efb182de70c1c72e Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 16:15:30 -0400 Subject: [PATCH 19/30] forgot errors file --- p2p/pex/errors.go | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 p2p/pex/errors.go diff --git a/p2p/pex/errors.go b/p2p/pex/errors.go new file mode 100644 index 000000000..6d5451e3f --- /dev/null +++ b/p2p/pex/errors.go @@ -0,0 +1,41 @@ +package pex + +import ( + "fmt" + + "github.com/tendermint/tendermint/p2p" +) + +type ErrAddrBookNonRoutable struct { + Addr *p2p.NetAddress +} + +func (err ErrAddrBookNonRoutable) Error() string { + return fmt.Sprintf("Cannot add non-routable address %v", err.Addr) +} + +type ErrAddrBookSelf struct { + Addr *p2p.NetAddress +} + +func (err ErrAddrBookSelf) Error() string { + return fmt.Sprintf("Cannot add ourselves with address %v", err.Addr) +} + +type ErrAddrBookNilAddr struct { + Addr *p2p.NetAddress + Src *p2p.NetAddress +} + +func (err ErrAddrBookNilAddr) Error() string { + return fmt.Sprintf("Cannot add a nil address. Got (addr, src) = (%v, %v)", err.Addr, err.Src) +} + +type ErrAddrBookFull struct { + Addr *p2p.NetAddress + Size int +} + +func (err ErrAddrBookFull) Error() string { + return fmt.Sprintf("Can't add new address (%v), addr book is full (%d)", err.Addr, err.Size) +} From 3498b676a6cd26f1b7011bec3b168a6e6062c75e Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 17:14:58 -0400 Subject: [PATCH 20/30] update spec and addrbook.go --- docs/specification/new-spec/p2p/node.md | 2 +- docs/specification/new-spec/p2p/peer.md | 21 +++++++------ .../new-spec/reactors/pex/pex.md | 5 +-- p2p/pex/addrbook.go | 31 ++++++------------- p2p/pex/errors.go | 9 ------ 5 files changed, 24 insertions(+), 44 deletions(-) diff --git a/docs/specification/new-spec/p2p/node.md b/docs/specification/new-spec/p2p/node.md index 589e3b436..c54cfeb32 100644 --- a/docs/specification/new-spec/p2p/node.md +++ b/docs/specification/new-spec/p2p/node.md @@ -12,7 +12,7 @@ Seeds should operate full nodes with the PEX reactor in a "crawler" mode that continuously explores to validate the availability of peers. Seeds should only respond with some top percentile of the best peers it knows about. -See [reputation](TODO) for details on peer quality. +See [the peer-exchange docs](/docs/specification/new-spec/reactors/pex/pex.md)for details on peer quality. ## New Full Node diff --git a/docs/specification/new-spec/p2p/peer.md b/docs/specification/new-spec/p2p/peer.md index 6aa36af79..b2808a60b 100644 --- a/docs/specification/new-spec/p2p/peer.md +++ b/docs/specification/new-spec/p2p/peer.md @@ -2,24 +2,23 @@ This document explains how Tendermint Peers are identified and how they connect to one another. -For details on peer discovery, see the [peer exchange (PEX) reactor doc](pex.md). +For details on peer discovery, see the [peer exchange (PEX) reactor doc](/docs/specification/new-spec/reactors/pex/pex.md). ## Peer Identity Tendermint peers are expected to maintain long-term persistent identities in the form of a public key. Each peer has an ID defined as `peer.ID == peer.PubKey.Address()`, where `Address` uses the scheme defined in go-crypto. -A single peer ID can have multiple IP addresses associated with it. -TODO: define how to deal with this. +A single peer ID can have multiple IP addresses associated with it, but a node +will only ever connect to one at a time. When attempting to connect to a peer, we use the PeerURL: `@:`. We will attempt to connect to the peer at IP:PORT, and verify, via authenticated encryption, that it is in possession of the private key corresponding to ``. This prevents man-in-the-middle attacks on the peer layer. -Peers can also be connected to without specifying an ID, ie. just `:`. -In this case, the peer must be authenticated out-of-band of Tendermint, -for instance via VPN. +If `auth_enc = false`, peers can use an arbitrary ID, but they must always use +one. Authentication can then happen out-of-band of Tendermint, for instance via VPN. ## Connections @@ -84,12 +83,13 @@ The Tendermint Version Handshake allows the peers to exchange their NodeInfo: ```golang type NodeInfo struct { ID p2p.ID - Moniker string - Network string - RemoteAddr string ListenAddr string + + Network string Version string Channels []int8 + + Moniker string Other []string } ``` @@ -98,9 +98,10 @@ The connection is disconnected if: - `peer.NodeInfo.ID` is not equal `peerConn.ID` - `peer.NodeInfo.Version` is not formatted as `X.X.X` where X are integers known as Major, Minor, and Revision - `peer.NodeInfo.Version` Major is not the same as ours -- `peer.NodeInfo.Version` Minor is not the same as ours - `peer.NodeInfo.Network` is not the same as ours - `peer.Channels` does not intersect with our known Channels. +- `peer.NodeInfo.ListenAddr` is malformed or is a DNS host that cannot be + resolved At this point, if we have not disconnected, the peer is valid. diff --git a/docs/specification/new-spec/reactors/pex/pex.md b/docs/specification/new-spec/reactors/pex/pex.md index d24271ebf..1c0ba1c3d 100644 --- a/docs/specification/new-spec/reactors/pex/pex.md +++ b/docs/specification/new-spec/reactors/pex/pex.md @@ -31,8 +31,9 @@ we try the seeds again. ## Listening -Peers listen on a configurable ListenAddr that they self-report during -handshakes with other peers. +Peers listen on a configurable ListenAddr that they self-report in their +NodeInfo during handshakes with other peers. Peers accept up to (MaxNumPeers - +MinNumOutboundPeers) incoming peers. ## Address Book diff --git a/p2p/pex/addrbook.go b/p2p/pex/addrbook.go index a93773472..e34bd5ab9 100644 --- a/p2p/pex/addrbook.go +++ b/p2p/pex/addrbook.go @@ -493,11 +493,11 @@ func (a *addrBook) getBucket(bucketType byte, bucketIdx int) map[string]*knownAd // Adds ka to new bucket. Returns false if it couldn't do it cuz buckets full. // NOTE: currently it always returns true. -func (a *addrBook) addToNewBucket(ka *knownAddress, bucketIdx int) bool { +func (a *addrBook) addToNewBucket(ka *knownAddress, bucketIdx int) { // Sanity check if ka.isOld() { - a.Logger.Error(cmn.Fmt("Cannot add address already in old bucket to a new bucket: %v", ka)) - return false + a.Logger.Error("Failed Sanity Check! Cant add old address to new bucket", "ka", knownAddress, "bucket", bucketIdx) + return } addrStr := ka.Addr.String() @@ -505,7 +505,7 @@ func (a *addrBook) addToNewBucket(ka *knownAddress, bucketIdx int) bool { // Already exists? if _, ok := bucket[addrStr]; ok { - return true + return } // Enforce max addresses. @@ -523,8 +523,6 @@ func (a *addrBook) addToNewBucket(ka *knownAddress, bucketIdx int) bool { // Add it to addrLookup a.addrLookup[ka.ID()] = ka - - return true } // Adds ka to old bucket. Returns false if it couldn't do it cuz buckets full. @@ -627,7 +625,6 @@ func (a *addrBook) addAddress(addr, src *p2p.NetAddress) error { } ka := a.addrLookup[addr.ID] - if ka != nil { // Already old. if ka.isOld() { @@ -647,10 +644,7 @@ func (a *addrBook) addAddress(addr, src *p2p.NetAddress) error { } bucket := a.calcNewBucket(addr, src) - added := a.addToNewBucket(ka, bucket) - if !added { - return ErrAddrBookFull{addr, a.size()} - } + a.addToNewBucket(ka, bucket) return nil } @@ -696,20 +690,13 @@ func (a *addrBook) moveToOld(ka *knownAddress) { oldBucketIdx := a.calcOldBucket(ka.Addr) added := a.addToOldBucket(ka, oldBucketIdx) if !added { - // No room, must evict something + // No room; move the oldest to a new bucket oldest := a.pickOldest(bucketTypeOld, oldBucketIdx) a.removeFromBucket(oldest, bucketTypeOld, oldBucketIdx) - // Find new bucket to put oldest in newBucketIdx := a.calcNewBucket(oldest.Addr, oldest.Src) - added := a.addToNewBucket(oldest, newBucketIdx) - // No space in newBucket either, just put it in freedBucket from above. - if !added { - added := a.addToNewBucket(oldest, freedBucket) - if !added { - a.Logger.Error(cmn.Fmt("Could not migrate oldest %v to freedBucket %v", oldest, freedBucket)) - } - } - // Finally, add to bucket again. + a.addToNewBucket(oldest, newBucketIdx) + + // Finally, add our ka to old bucket again. added = a.addToOldBucket(ka, oldBucketIdx) if !added { a.Logger.Error(cmn.Fmt("Could not re-add ka %v to oldBucketIdx %v", ka, oldBucketIdx)) diff --git a/p2p/pex/errors.go b/p2p/pex/errors.go index 6d5451e3f..0b8bf4715 100644 --- a/p2p/pex/errors.go +++ b/p2p/pex/errors.go @@ -30,12 +30,3 @@ type ErrAddrBookNilAddr struct { func (err ErrAddrBookNilAddr) Error() string { return fmt.Sprintf("Cannot add a nil address. Got (addr, src) = (%v, %v)", err.Addr, err.Src) } - -type ErrAddrBookFull struct { - Addr *p2p.NetAddress - Size int -} - -func (err ErrAddrBookFull) Error() string { - return fmt.Sprintf("Can't add new address (%v), addr book is full (%d)", err.Addr, err.Size) -} From 3a30ee75b994cee85afa0569587e42030ef4539d Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 17:27:51 -0400 Subject: [PATCH 21/30] minor fixes --- docs/specification/new-spec/reactors/pex/pex.md | 12 ++++++------ p2p/pex/addrbook.go | 4 +--- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/docs/specification/new-spec/reactors/pex/pex.md b/docs/specification/new-spec/reactors/pex/pex.md index 1c0ba1c3d..c879c336a 100644 --- a/docs/specification/new-spec/reactors/pex/pex.md +++ b/docs/specification/new-spec/reactors/pex/pex.md @@ -35,22 +35,22 @@ Peers listen on a configurable ListenAddr that they self-report in their NodeInfo during handshakes with other peers. Peers accept up to (MaxNumPeers - MinNumOutboundPeers) incoming peers. - ## Address Book Peers are tracked via their ID (their PubKey.Address()). -For each ID, the address book keeps the most recent IP:PORT. Peers are added to the address book from the PEX when they first connect to us or when we hear about them from other peers. The address book is arranged in sets of buckets, and distinguishes between vetted (old) and unvetted (new) peers. It keeps different sets of buckets for vetted and -unvetted peers. Buckets provide randomization over peer selection. +unvetted peers. Buckets provide randomization over peer selection. Peers are put +in buckets according to their IP groups. -A vetted peer can only be in one bucket. An unvetted peer can be in multiple buckets. +A vetted peer can only be in one bucket. An unvetted peer can be in multiple buckets, and +each instance of the peer can have a different IP:PORT. -If there's no space in the book, we check the bucket for bad peers, which include peers we've attempted to -dial 3 times, and then remove them from only that bucket. +If we're trying to add a new peer but there's no space in its bucket, we'll +remove the worst peer from that bucket to make room. ## Vetting diff --git a/p2p/pex/addrbook.go b/p2p/pex/addrbook.go index e34bd5ab9..6521b6491 100644 --- a/p2p/pex/addrbook.go +++ b/p2p/pex/addrbook.go @@ -496,7 +496,7 @@ func (a *addrBook) getBucket(bucketType byte, bucketIdx int) map[string]*knownAd func (a *addrBook) addToNewBucket(ka *knownAddress, bucketIdx int) { // Sanity check if ka.isOld() { - a.Logger.Error("Failed Sanity Check! Cant add old address to new bucket", "ka", knownAddress, "bucket", bucketIdx) + a.Logger.Error("Failed Sanity Check! Cant add old address to new bucket", "ka", ka, "bucket", bucketIdx) return } @@ -679,8 +679,6 @@ func (a *addrBook) moveToOld(ka *knownAddress) { return } - // Remember one of the buckets in which ka is in. - freedBucket := ka.Buckets[0] // Remove from all (new) buckets. a.removeFromAllBuckets(ka) // It's officially old now. From fae94a44a2d6bbd7fcef548200b6c26ec668795c Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 20:07:38 -0400 Subject: [PATCH 22/30] p2p/pex: some addrbook fixes * fix before/after in isBad() * allow multiple IPs per ID even if ID isOld --- p2p/pex/addrbook.go | 4 ++-- p2p/pex/known_address.go | 11 +++++------ p2p/switch.go | 18 +++++++++++------- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/p2p/pex/addrbook.go b/p2p/pex/addrbook.go index 6521b6491..5b4428f5e 100644 --- a/p2p/pex/addrbook.go +++ b/p2p/pex/addrbook.go @@ -626,8 +626,8 @@ func (a *addrBook) addAddress(addr, src *p2p.NetAddress) error { ka := a.addrLookup[addr.ID] if ka != nil { - // Already old. - if ka.isOld() { + // If its already old and the addr is the same, ignore it. + if ka.isOld() && ka.Addr.Equals(addr) { return nil } // Already in max new buckets. diff --git a/p2p/pex/known_address.go b/p2p/pex/known_address.go index 0261e4902..5673dec11 100644 --- a/p2p/pex/known_address.go +++ b/p2p/pex/known_address.go @@ -106,7 +106,6 @@ func (ka *knownAddress) removeBucketRef(bucketIdx int) int { All addresses that meet these criteria are assumed to be worthless and not worth keeping hold of. - XXX: so a good peer needs us to call MarkGood before the conditions above are reached! */ func (ka *knownAddress) isBad() bool { // Is Old --> good @@ -115,14 +114,15 @@ func (ka *knownAddress) isBad() bool { } // Has been attempted in the last minute --> good - if ka.LastAttempt.Before(time.Now().Add(-1 * time.Minute)) { + if ka.LastAttempt.After(time.Now().Add(-1 * time.Minute)) { return false } + // TODO: From the future? + // Too old? - // XXX: does this mean if we've kept a connection up for this long we'll disconnect?! - // and shouldn't it be .Before ? - if ka.LastAttempt.After(time.Now().Add(-1 * numMissingDays * time.Hour * 24)) { + // TODO: should be a timestamp of last seen, not just last attempt + if ka.LastAttempt.Before(time.Now().Add(-1 * numMissingDays * time.Hour * 24)) { return true } @@ -132,7 +132,6 @@ func (ka *knownAddress) isBad() bool { } // Hasn't succeeded in too long? - // XXX: does this mean if we've kept a connection up for this long we'll disconnect?! if ka.LastSuccess.Before(time.Now().Add(-1*minBadDays*time.Hour*24)) && ka.Attempts >= maxFailures { return true diff --git a/p2p/switch.go b/p2p/switch.go index ffc1cbb8e..bccae393b 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -290,6 +290,8 @@ func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) { // to the PEX/Addrbook to find the peer with the addr again // NOTE: this will keep trying even if the handshake or auth fails. // TODO: be more explicit with error types so we only retry on certain failures +// - ie. if we're getting ErrDuplicatePeer we can stop +// because the addrbook got us the peer back already func (sw *Switch) reconnectToPeer(addr *NetAddress) { if sw.reconnecting.Has(string(addr.ID)) { return @@ -305,14 +307,14 @@ func (sw *Switch) reconnectToPeer(addr *NetAddress) { } err := sw.DialPeerWithAddress(addr, true) - if err != nil { - sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) - // sleep a set amount - sw.randomSleep(reconnectInterval) - continue - } else { - return + if err == nil { + return // success } + + sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr) + // sleep a set amount + sw.randomSleep(reconnectInterval) + continue } sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff", @@ -501,6 +503,8 @@ func (sw *Switch) addInboundPeerWithConfig(conn net.Conn, config *PeerConfig) er // dial the peer; make secret connection; authenticate against the dialed ID; // add the peer. +// if dialing fails, start the reconnect loop. If handhsake fails, its over. +// If peer is started succesffuly, reconnectLoop will start when StopPeerForError is called func (sw *Switch) addOutboundPeerWithConfig(addr *NetAddress, config *PeerConfig, persistent bool) error { sw.Logger.Info("Dialing peer", "address", addr) peerConn, err := newOutboundPeerConn(addr, config, persistent, sw.nodeKey.PrivKey) From d3c4f746a7a95e1280cc497480418d944813321e Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 21:12:25 -0400 Subject: [PATCH 23/30] spec: abci notes. closes #1257 --- docs/specification/new-spec/abci.md | 101 +++++++++++++++++++++++++++- 1 file changed, 100 insertions(+), 1 deletion(-) diff --git a/docs/specification/new-spec/abci.md b/docs/specification/new-spec/abci.md index 813aac787..a16dd8fce 100644 --- a/docs/specification/new-spec/abci.md +++ b/docs/specification/new-spec/abci.md @@ -5,15 +5,47 @@ and an application (the actual state machine). The ABCI message types are defined in a [protobuf file](https://github.com/tendermint/abci/blob/master/types/types.proto). + For full details on the ABCI message types and protocol, see the [ABCI specificaiton](https://github.com/tendermint/abci/blob/master/specification.rst). +Be sure to read the specification if you're trying to build an ABCI app! + For additional details on server implementation, see the [ABCI readme](https://github.com/tendermint/abci#implementation). Here we provide some more details around the use of ABCI by Tendermint and clarify common "gotchas". -## Validator Updates +## ABCI connections + +Tendermint opens 3 ABCI connections to the app: one for Consensus, one for +Mempool, one for Queries. + +## Async vs Sync + +The main ABCI server (ie. non-GRPC) provides ordered asynchronous messages. +This is useful for DeliverTx and CheckTx, since it allows Tendermint to forward +transactions to the app before it's finished processing previous ones. + +Thus, DeliverTx and CheckTx messages are sent asycnhronously, while all other +messages are sent synchronously. + +## CheckTx and Commit + +It is typical to hold three distinct states in an ABCI app: CheckTxState, DeliverTxState, +QueryState. The QueryState contains the latest committed state for a block. +The CheckTxState and DeliverTxState may be updated concurrently with one another. +Before Commit is called, Tendermint locks and flushes the mempool so that no new changes will happen +to CheckTxState. When Commit completes, it unlocks the mempool. + +Thus, during Commit, it is safe to reset the QueryState and the CheckTxState to the latest DeliverTxState +(ie. the new state from executing all the txs in the block). + +Note, however, that it is not possible to send transactions to Tendermint during Commit - if your app +tries to send a `/broadcast_tx` to Tendermint during Commit, it will deadlock. + + +## EndBlock Validator Updates Updates to the Tendermint validator set can be made by returning `Validator` objects in the `ResponseBeginBlock`: @@ -67,3 +99,70 @@ using the following paths, with no additional data: If either of these queries return a non-zero ABCI code, Tendermint will refuse to connect to the peer. + +## Info and the Handshake/Replay + +On startup, Tendermint calls Info on the Query connection to get the latest +committed state of the app. The app MUST return information consistent with the +last block it succesfully completed Commit for. + +If the app succesfully committed block H but not H+1, then `last_block_height = +H` and `last_block_app_hash = `. If the app +failed during the Commit of block H, then `last_block_height = H-1` and +`last_block_app_hash = `. + +We now distinguish three heights, and describe how Tendermint syncs itself with +the app. + +``` +storeBlockHeight = height of the last block Tendermint saw a commit for +stateBlockHeight = height of the last block for which Tendermint completed all + block processing and saved all ABCI results to disk +appBlockHeight = height of the last block for which ABCI app succesfully + completely Commit + +``` + +Note we always have `storeBlockHeight >= stateBlockHeight` and `storeBlockHeight >= appBlockHeight` +Note also we never call Commit on an ABCI app twice for the same height. + +The procedure is as follows. + +First, some simeple start conditions: + +If `appBlockHeight == 0`, then call InitChain. + +If `storeBlockHeight == 0`, we're done. + +Now, some sanity checks: + +If `storeBlockHeight < appBlockHeight`, error +If `storeBlockHeight < stateBlockHeight`, panic +If `storeBlockHeight > stateBlockHeight+1`, panic + +Now, the meat: + +If `storeBlockHeight == stateBlockHeight && appBlockHeight < storeBlockHeight`, + replay all blocks in full from `appBlockHeight` to `storeBlockHeight`. + This happens if we completed processing the block, but the app forgot its height. + +If `storeBlockHeight == stateBlockHeight && appBlockHeight == storeBlockHeight`, we're done + This happens if we crashed at an opportune spot. + +If `storeBlockHeight == stateBlockHeight+1` + This happens if we started processing the block but didn't finish. + + If `appBlockHeight < stateBlockHeight` + replay all blocks in full from `appBlockHeight` to `storeBlockHeight-1`, + and replay the block at `storeBlockHeight` using the WAL. + This happens if the app forgot the last block it committed. + + If `appBlockHeight == stateBlockHeight`, + replay the last block (storeBlockHeight) in full. + This happens if we crashed before the app finished Commit + + If appBlockHeight == storeBlockHeight { + update the state using the saved ABCI responses but dont run the block against the real app. + This happens if we crashed after the app finished Commit but before Tendermint saved the state. + From c195772de1c9e5a4bf53935ade3e01c0c82577cb Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 28 Apr 2018 21:17:28 -0400 Subject: [PATCH 24/30] p2p: small lint --- p2p/node_info.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/p2p/node_info.go b/p2p/node_info.go index 5e08cde1e..985142aa2 100644 --- a/p2p/node_info.go +++ b/p2p/node_info.go @@ -58,11 +58,7 @@ func (info NodeInfo) Validate() error { // ensure ListenAddr is good _, err := NewNetAddressString(IDAddressString(info.ID, info.ListenAddr)) - if err != nil { - return err - } - - return nil + return err } // CompatibleWith checks if two NodeInfo are compatible with eachother. From 12fc3961016dcff7a2c8ebcf37e68185f6917e8b Mon Sep 17 00:00:00 2001 From: Max Levy <35595512+maxim-levy@users.noreply.github.com> Date: Sun, 29 Apr 2018 17:35:10 +0300 Subject: [PATCH 25/30] Split CMD's param on space (#1523) This is to avoid "ERROR: unknown flag: --proxy_app dummy" message when running "docker-compose up" --- networks/local/localnode/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/networks/local/localnode/Dockerfile b/networks/local/localnode/Dockerfile index 7eeba963d..c4556caa4 100644 --- a/networks/local/localnode/Dockerfile +++ b/networks/local/localnode/Dockerfile @@ -9,7 +9,7 @@ VOLUME [ /tendermint ] WORKDIR /tendermint EXPOSE 46656 46657 ENTRYPOINT ["/usr/bin/wrapper.sh"] -CMD ["node", "--proxy_app dummy"] +CMD ["node", "--proxy_app", "dummy"] STOPSIGNAL SIGTERM COPY wrapper.sh /usr/bin/wrapper.sh From b6c062c451689c2141d535d2491e67244a277e3f Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Mon, 30 Apr 2018 08:19:19 -0400 Subject: [PATCH 26/30] fixes from review --- .../new-spec/reactors/pex/pex.md | 8 +++---- p2p/pex/addrbook.go | 21 ++++++++++++------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/docs/specification/new-spec/reactors/pex/pex.md b/docs/specification/new-spec/reactors/pex/pex.md index c879c336a..44a0e3fd8 100644 --- a/docs/specification/new-spec/reactors/pex/pex.md +++ b/docs/specification/new-spec/reactors/pex/pex.md @@ -20,9 +20,9 @@ Peer discovery begins with a list of seeds. When we have no peers, or have been unable to find enough peers from existing ones, we dial a randomly selected seed to get a list of peers to dial. -On startup, we will also immediately dial the given list of `persisten_peers`, -and will attempt to maintain persistent connections with them. If the connections die, or we fail to dail, -we will redial every 5s for a a few minutes, then switch to an exponential backoff schedule, +On startup, we will also immediately dial the given list of `persistent_peers`, +and will attempt to maintain persistent connections with them. If the connections die, or we fail to dial, +we will redial every 5s for a few minutes, then switch to an exponential backoff schedule, and after about a day of trying, stop dialing the peer. So long as we have less than `MinNumOutboundPeers`, we periodically request additional peers @@ -84,7 +84,7 @@ Connection attempts are made with exponential backoff (plus jitter). Because the selection process happens every `ensurePeersPeriod`, we might not end up dialing a peer for much longer than the backoff duration. -If we fail to conenct to the peer after 16 tries (with exponential backoff), we remove from address book completely. +If we fail to connect to the peer after 16 tries (with exponential backoff), we remove from address book completely. ## Select Peers to Exchange diff --git a/p2p/pex/addrbook.go b/p2p/pex/addrbook.go index 5b4428f5e..4408c3b91 100644 --- a/p2p/pex/addrbook.go +++ b/p2p/pex/addrbook.go @@ -221,7 +221,11 @@ func (a *addrBook) PickAddress(biasTowardsNewAddrs int) *p2p.NetAddress { a.mtx.Lock() defer a.mtx.Unlock() - if a.size() == 0 { + bookSize := a.size() + if bookSize <= 0 { + if bookSize < 0 { + a.Logger.Error("Addrbook size less than 0", "nNew", a.nNew, "nOld", a.nOld) + } return nil } if biasTowardsNewAddrs > 100 { @@ -301,7 +305,10 @@ func (a *addrBook) GetSelection() []*p2p.NetAddress { defer a.mtx.Unlock() bookSize := a.size() - if bookSize == 0 { + if bookSize <= 0 { + if bookSize < 0 { + a.Logger.Error("Addrbook size less than 0", "nNew", a.nNew, "nOld", a.nOld) + } return nil } @@ -344,7 +351,10 @@ func (a *addrBook) GetSelectionWithBias(biasTowardsNewAddrs int) []*p2p.NetAddre defer a.mtx.Unlock() bookSize := a.size() - if bookSize == 0 { + if bookSize <= 0 { + if bookSize < 0 { + a.Logger.Error("Addrbook size less than 0", "nNew", a.nNew, "nOld", a.nOld) + } return nil } @@ -609,10 +619,7 @@ func (a *addrBook) pickOldest(bucketType byte, bucketIdx int) *knownAddress { // adds the address to a "new" bucket. if its already in one, // it only adds it probabilistically func (a *addrBook) addAddress(addr, src *p2p.NetAddress) error { - if addr == nil { - return ErrAddrBookNilAddr{addr, src} - } - if src == nil { + if addr == nil || src == nil { return ErrAddrBookNilAddr{addr, src} } From 47557f868a5b18f21aa82230c2b55c08d88783a8 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Mon, 30 Apr 2018 08:31:03 -0400 Subject: [PATCH 27/30] create addrbook even if pex=false. fixes #1525 --- node/node.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/node/node.go b/node/node.go index 408dee7ef..f0d98d603 100644 --- a/node/node.go +++ b/node/node.go @@ -274,11 +274,11 @@ func NewNode(config *cfg.Config, // if auth_enc=false. // // If PEX is on, it should handle dialing the seeds. Otherwise the switch does it. + // Note we currently use the addrBook regardless at least for AddOurAddress var addrBook pex.AddrBook + addrBook = pex.NewAddrBook(config.P2P.AddrBookFile(), config.P2P.AddrBookStrict) + addrBook.SetLogger(p2pLogger.With("book", config.P2P.AddrBookFile())) if config.P2P.PexReactor { - addrBook = pex.NewAddrBook(config.P2P.AddrBookFile(), config.P2P.AddrBookStrict) - addrBook.SetLogger(p2pLogger.With("book", config.P2P.AddrBookFile())) - // TODO persistent peers ? so we can have their DNS addrs saved pexReactor := pex.NewPEXReactor(addrBook, &pex.PEXReactorConfig{ From f395b82f73f0d683070ca251af5ffdd4b553d0f7 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Mon, 30 Apr 2018 15:59:05 -0400 Subject: [PATCH 28/30] node: remove commented out trustMetric --- node/node.go | 1 - 1 file changed, 1 deletion(-) diff --git a/node/node.go b/node/node.go index f0d98d603..d8bed4816 100644 --- a/node/node.go +++ b/node/node.go @@ -100,7 +100,6 @@ type Node struct { // network sw *p2p.Switch // p2p connections addrBook pex.AddrBook // known peers - // trustMetricStore *trust.TrustMetricStore // trust metrics for all peers // services eventBus *types.EventBus // pub/sub for services From 66c2b6032495236c21c7e978140116995b29ff63 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Mon, 30 Apr 2018 16:06:45 -0400 Subject: [PATCH 29/30] version and changelog --- CHANGELOG.md | 5 ++++- version/version.go | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e3560666a..2c1503598 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,7 +24,7 @@ BUG FIXES: - Graceful handling/recovery for apps that have non-determinism or fail to halt - Graceful handling/recovery for violations of safety, or liveness -## 0.19.2 (TBD) +## 0.19.2 (April 30th, 2018) FEATURES: @@ -42,6 +42,9 @@ BUG FIXES: - [p2p] Validate NodeInfo.ListenAddr - [p2p] Only allow (MaxNumPeers - MaxNumOutboundPeers) inbound peers - [p2p/pex] Limit max msg size to 64kB +- [p2p] Fix panic when pex=false +- [p2p] Allow multiple IPs per ID in AddrBook +- [p2p] Fix before/after bugs in addrbook isBad() ## 0.19.1 (April 27th, 2018) diff --git a/version/version.go b/version/version.go index 31479b32b..1a2c64962 100644 --- a/version/version.go +++ b/version/version.go @@ -10,7 +10,7 @@ const ( var ( // Version is the current version of Tendermint // Must be a string because scripts like dist.sh read this file. - Version = "0.19.2-dev" + Version = "0.19.2" // GitCommit is the current HEAD set using ldflags. GitCommit string From cae31157b13671b00651cb24a683ab84b424f99f Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Mon, 30 Apr 2018 18:23:02 -0400 Subject: [PATCH 30/30] fix lint --- node/node.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/node/node.go b/node/node.go index d8bed4816..fdc466695 100644 --- a/node/node.go +++ b/node/node.go @@ -274,8 +274,7 @@ func NewNode(config *cfg.Config, // // If PEX is on, it should handle dialing the seeds. Otherwise the switch does it. // Note we currently use the addrBook regardless at least for AddOurAddress - var addrBook pex.AddrBook - addrBook = pex.NewAddrBook(config.P2P.AddrBookFile(), config.P2P.AddrBookStrict) + addrBook := pex.NewAddrBook(config.P2P.AddrBookFile(), config.P2P.AddrBookStrict) addrBook.SetLogger(p2pLogger.With("book", config.P2P.AddrBookFile())) if config.P2P.PexReactor { // TODO persistent peers ? so we can have their DNS addrs saved