From 936d1a0e681a060969db26edb29ed55605565a1b Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Tue, 20 Mar 2018 21:41:08 +0100 Subject: [PATCH] some notes about the p2p layer --- .../new-spec/reactors/pex/pex.md | 27 +++++++-- node/node.go | 42 +++++++++----- p2p/netaddress.go | 1 + p2p/pex/pex_reactor.go | 57 +++++++++++++++---- p2p/switch.go | 6 +- rpc/core/net.go | 2 + 6 files changed, 105 insertions(+), 30 deletions(-) diff --git a/docs/specification/new-spec/reactors/pex/pex.md b/docs/specification/new-spec/reactors/pex/pex.md index d3981bda2..29d65d17d 100644 --- a/docs/specification/new-spec/reactors/pex/pex.md +++ b/docs/specification/new-spec/reactors/pex/pex.md @@ -7,7 +7,8 @@ to good peers and to gossip peers to others. ## Peer Types Certain peers are special in that they are specified by the user as `persistent`, -which means we auto-redial them if the connection fails. +which means we auto-redial them if the connection fails, or if we fail to dial +them. Some peers can be marked as `private`, which means we will not put them in the address book or gossip them to others. @@ -19,6 +20,11 @@ Peer discovery begins with a list of seeds. When we have no peers, or have been unable to find enough peers from existing ones, we dial a randomly selected seed to get a list of peers to dial. +On startup, we will also immediately dial the given list of `persisten_peers`, +and will attempt to maintain persistent connections with them. If the connections die, +we will redial every 5s for a a few minutes, then switch to an exponential backoff schedule, +and after about a day of trying, stop dialing the peer. + So long as we have less than `MaxPeers`, we periodically request additional peers from each of our own. If sufficient time goes by and we still can't find enough peers, we try the seeds again. @@ -68,6 +74,19 @@ Connection attempts are made with exponential backoff (plus jitter). Because the selection process happens every `ensurePeersPeriod`, we might not end up dialing a peer for much longer than the backoff duration. +....... +----------------- +TODO: put this in the right place. And add more details. + +AddrBook: If there's no space in the book, we check the bucket for bad peers, which include peers we've attempted to +dial 3 times, and then remove them from only that bucket. + +PEX: if we fail to conenct to the peer after 16 tries (with exponential backoff), we remove from address book completely. + +----------------- +....... + + ## Select Peers to Exchange When we’re asked for peers, we select them as follows: @@ -86,9 +105,9 @@ Note that the bad behaviour may be detected outside the PEX reactor itself (for instance, in the mconnection, or another reactor), but it must be communicated to the PEX reactor so it can remove and mark the peer. -In the PEX, if a peer sends us unsolicited lists of peers, -or if the peer sends too many requests for more peers in a given amount of time, -we Disconnect and Mark. +In the PEX, if a peer sends us an unsolicited list of peers, +or if the peer sends a request too soon after another one, +we Disconnect and MarkBad. ## Trust Metric diff --git a/node/node.go b/node/node.go index 205e672f3..00d760664 100644 --- a/node/node.go +++ b/node/node.go @@ -6,6 +6,7 @@ import ( "fmt" "net" "net/http" + "strings" abci "github.com/tendermint/abci/types" amino "github.com/tendermint/go-amino" @@ -21,7 +22,6 @@ import ( mempl "github.com/tendermint/tendermint/mempool" "github.com/tendermint/tendermint/p2p" "github.com/tendermint/tendermint/p2p/pex" - "github.com/tendermint/tendermint/p2p/trust" "github.com/tendermint/tendermint/proxy" rpccore "github.com/tendermint/tendermint/rpc/core" ctypes "github.com/tendermint/tendermint/rpc/core/types" @@ -99,9 +99,9 @@ type Node struct { privValidator types.PrivValidator // local node's validator key // network - sw *p2p.Switch // p2p connections - addrBook pex.AddrBook // known peers - trustMetricStore *trust.TrustMetricStore // trust metrics for all peers + sw *p2p.Switch // p2p connections + addrBook pex.AddrBook // known peers + // trustMetricStore *trust.TrustMetricStore // trust metrics for all peers // services eventBus *types.EventBus // pub/sub for services @@ -262,20 +262,33 @@ func NewNode(config *cfg.Config, sw.AddReactor("EVIDENCE", evidenceReactor) // Optionally, start the pex reactor + // + // TODO: + // + // We need to set Seeds and PersistentPeers on the switch, + // since it needs to be able to use these (and their DNS names) + // even if the PEX is off. We can include the DNS name in the NetAddress, + // but it would still be nice to have a clear list of the current "PersistentPeers" + // somewhere that we can return with net_info. + // + // Let's assume we always have IDs ... and we just dont authenticate them + // if auth_enc=false. + // + // If PEX is on, it should handle dialing the seeds. Otherwise the switch does it. var addrBook pex.AddrBook - var trustMetricStore *trust.TrustMetricStore if config.P2P.PexReactor { addrBook = pex.NewAddrBook(config.P2P.AddrBookFile(), config.P2P.AddrBookStrict) addrBook.SetLogger(p2pLogger.With("book", config.P2P.AddrBookFile())) - // Get the trust metric history data - trustHistoryDB, err := dbProvider(&DBContext{"trusthistory", config}) - if err != nil { - return nil, err + var seeds []string + if config.P2P.Seeds != "" { + seeds = strings.Split(config.P2P.Seeds, ",") } - trustMetricStore = trust.NewTrustMetricStore(trustHistoryDB, trust.DefaultConfig()) - trustMetricStore.SetLogger(p2pLogger) - + var privatePeerIDs []string + if config.P2P.PrivatePeerIDs != "" { + privatePeerIDs = strings.Split(config.P2P.PrivatePeerIDs, ",") + } + // TODO persistent peers ? so we can have their DNS addrs saved pexReactor := pex.NewPEXReactor(addrBook, &pex.PEXReactorConfig{ Seeds: cmn.SplitAndTrim(config.P2P.Seeds, ",", " "), @@ -355,9 +368,8 @@ func NewNode(config *cfg.Config, genesisDoc: genDoc, privValidator: privValidator, - sw: sw, - addrBook: addrBook, - trustMetricStore: trustMetricStore, + sw: sw, + addrBook: addrBook, stateDB: stateDB, blockStore: blockStore, diff --git a/p2p/netaddress.go b/p2p/netaddress.go index 6b88f5670..6cf86f22e 100644 --- a/p2p/netaddress.go +++ b/p2p/netaddress.go @@ -22,6 +22,7 @@ type NetAddress struct { ID ID IP net.IP Port uint16 + Name string // optional DNS name str string } diff --git a/p2p/pex/pex_reactor.go b/p2p/pex/pex_reactor.go index 70ae7ed11..7897a5534 100644 --- a/p2p/pex/pex_reactor.go +++ b/p2p/pex/pex_reactor.go @@ -20,7 +20,10 @@ const ( // PexChannel is a channel for PEX messages PexChannel = byte(0x00) - maxMsgSize = 1048576 // 1MB + // TODO: make smaller. Should match the maxGetSelection + // this is basically the amplification factor since a request + // msg is like 1 byte ... it can cause us to send msgs of this size! + maxPexMessageSize = 1048576 // 1MB // ensure we have enough peers defaultEnsurePeersPeriod = 30 * time.Second @@ -61,7 +64,7 @@ type PEXReactor struct { book AddrBook config *PEXReactorConfig - ensurePeersPeriod time.Duration + ensurePeersPeriod time.Duration // TODO: should go in the config // maps to prevent abuse requestsSent *cmn.CMap // ID->struct{}: unanswered send requests @@ -70,6 +73,12 @@ type PEXReactor struct { attemptsToDial sync.Map // address (string) -> {number of attempts (int), last time dialed (time.Time)} } +func (pexR *PEXReactor) minReceiveRequestInterval() time.Duration { + // NOTE: must be less than ensurePeersPeriod, otherwise we'll request + // peers too quickly from others and they'll think we're bad! + return pexR.ensurePeersPeriod / 3 +} + // PEXReactorConfig holds reactor specific configuration data. type PEXReactorConfig struct { // Seed/Crawler mode @@ -113,6 +122,9 @@ func (r *PEXReactor) OnStart() error { } // return err if user provided a bad seed address + // NOTE: only if its an invalid address. + // If we simply fail to resovle a DNS name, + // we shouldn't exit here ... if err := r.checkSeeds(); err != nil { return err } @@ -195,6 +207,10 @@ func (r *PEXReactor) Receive(chID byte, src Peer, msgBytes []byte) { } // Seeds disconnect after sending a batch of addrs + // NOTE: this is a prime candidate for amplification attacks + // so it's important we + // 1) restrict how frequently peers can request + // 2) limit the output size if r.config.SeedMode { r.SendAddrs(src, r.book.GetSelectionWithBias(biasToSelectNewPeers)) r.Switch.StopPeerGracefully(src) @@ -213,6 +229,7 @@ func (r *PEXReactor) Receive(chID byte, src Peer, msgBytes []byte) { } } +// enforces a minimum amount of time between requests func (r *PEXReactor) receiveRequest(src Peer) error { id := string(src.ID()) v := r.lastReceivedRequests.Get(id) @@ -232,8 +249,14 @@ func (r *PEXReactor) receiveRequest(src Peer) error { } now := time.Now() - if now.Sub(lastReceived) < r.ensurePeersPeriod/3 { - return fmt.Errorf("Peer (%v) is sending too many PEX requests. Disconnecting", src.ID()) + minInterval := r.minReceiveRequestInterval() + if now.Sub(lastReceived) < minInterval { + return fmt.Errorf("Peer (%v) send next PEX request too soon. lastReceived: %v, now: %v, minInterval: %v. Disconnecting", + src.ID(), + lastReceived, + now, + minInterval, + ) } r.lastReceivedRequests.Set(id, now) return nil @@ -264,7 +287,11 @@ func (r *PEXReactor) ReceiveAddrs(addrs []*p2p.NetAddress, src Peer) error { srcAddr := src.NodeInfo().NetAddress() for _, netAddr := range addrs { + // TODO: make sure correct nodes never send nil and return error + // if a netAddr == nil if netAddr != nil && !isAddrPrivate(netAddr, r.config.PrivatePeerIDs) { + // TODO: Should we moe the list of private peers into the AddrBook so AddAddress + // can do the check for us, and we don't have to worry about checking before calling ? err := r.book.AddAddress(netAddr, srcAddr) if err != nil { r.Logger.Error("Failed to add new address", "err", err) @@ -360,6 +387,9 @@ func (r *PEXReactor) ensurePeers() { if connected := r.Switch.Peers().Has(try.ID); connected { continue } + // TODO: consider moving some checks from toDial into here + // so we don't even consider dialing peers that we want to wait + // before dialling again, or have dialled too many times already r.Logger.Info("Will dial address", "addr", try) toDial[try.ID] = try } @@ -387,13 +417,17 @@ func (r *PEXReactor) ensurePeers() { } } -func (r *PEXReactor) dialPeer(addr *p2p.NetAddress) { - var attempts int - var lastDialed time.Time - if lAttempts, attempted := r.attemptsToDial.Load(addr.DialString()); attempted { - attempts = lAttempts.(_attemptsToDial).number - lastDialed = lAttempts.(_attemptsToDial).lastDialed +func (r *PEXReactor) dialAttemptsInfo(addr *p2p.NetAddress) (attempts int, lastDialed time.Time) { + _attempts, ok := r.attemptsToDial.Load(addr.DialString()) + if !ok { + return } + atd := _attempts.(_attemptsToDial) + return atd.number, atd.lastDialed +} + +func (r *PEXReactor) dialPeer(addr *p2p.NetAddress) { + attempts, lastDialed := r.dialAttemptsInfo(addr) if attempts > maxAttemptsToDial { r.Logger.Error("Reached max attempts to dial", "addr", addr, "attempts", attempts) @@ -439,6 +473,9 @@ func (r *PEXReactor) checkSeeds() error { if lSeeds == 0 { return nil } + // TODO: don't exit the program if we simply cant resolve a DNS name. + // But if names or addresses are incorrectly speficied (ie. invalid), + // then we should return an err that causes an exit _, errs := p2p.NewNetAddressStrings(r.config.Seeds) for _, err := range errs { if err != nil { diff --git a/p2p/switch.go b/p2p/switch.go index b47b967f9..598fac468 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -351,6 +351,7 @@ func (sw *Switch) IsDialing(id ID) bool { } // DialPeersAsync dials a list of peers asynchronously in random order (optionally, making them persistent). +// TODO: remove addrBook arg since it's now set on the switch func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent bool) error { netAddrs, errs := NewNetAddressStrings(peers) // only log errors, dial correct addresses @@ -360,7 +361,10 @@ func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent b ourAddr := sw.nodeInfo.NetAddress() - // TODO: move this out of here ? + // TODO: this code feels like it's in the wrong place. + // The integration tests depend on the addrBook being saved + // right away but maybe we can change that. Recall that + // the addrBook is only written to disk every 2min if addrBook != nil { // add peers to `addrBook` for _, netAddr := range netAddrs { diff --git a/rpc/core/net.go b/rpc/core/net.go index 9b04926ab..d3123b9eb 100644 --- a/rpc/core/net.go +++ b/rpc/core/net.go @@ -47,6 +47,8 @@ func NetInfo() (*ctypes.ResultNetInfo, error) { ConnectionStatus: peer.Status(), }) } + // TODO: should we include "num_peers" field for convenience ? + // Let's also include the PersistentPeers and Seeds in here. return &ctypes.ResultNetInfo{ Listening: listening, Listeners: listeners,