Browse Source

some notes about the p2p layer

pull/1519/head
Ethan Buchman 7 years ago
parent
commit
936d1a0e68
6 changed files with 105 additions and 30 deletions
  1. +23
    -4
      docs/specification/new-spec/reactors/pex/pex.md
  2. +27
    -15
      node/node.go
  3. +1
    -0
      p2p/netaddress.go
  4. +47
    -10
      p2p/pex/pex_reactor.go
  5. +5
    -1
      p2p/switch.go
  6. +2
    -0
      rpc/core/net.go

+ 23
- 4
docs/specification/new-spec/reactors/pex/pex.md View File

@ -7,7 +7,8 @@ to good peers and to gossip peers to others.
## Peer Types
Certain peers are special in that they are specified by the user as `persistent`,
which means we auto-redial them if the connection fails.
which means we auto-redial them if the connection fails, or if we fail to dial
them.
Some peers can be marked as `private`, which means
we will not put them in the address book or gossip them to others.
@ -19,6 +20,11 @@ Peer discovery begins with a list of seeds.
When we have no peers, or have been unable to find enough peers from existing ones,
we dial a randomly selected seed to get a list of peers to dial.
On startup, we will also immediately dial the given list of `persisten_peers`,
and will attempt to maintain persistent connections with them. If the connections die,
we will redial every 5s for a a few minutes, then switch to an exponential backoff schedule,
and after about a day of trying, stop dialing the peer.
So long as we have less than `MaxPeers`, we periodically request additional peers
from each of our own. If sufficient time goes by and we still can't find enough peers,
we try the seeds again.
@ -68,6 +74,19 @@ Connection attempts are made with exponential backoff (plus jitter). Because
the selection process happens every `ensurePeersPeriod`, we might not end up
dialing a peer for much longer than the backoff duration.
.......
-----------------
TODO: put this in the right place. And add more details.
AddrBook: If there's no space in the book, we check the bucket for bad peers, which include peers we've attempted to
dial 3 times, and then remove them from only that bucket.
PEX: if we fail to conenct to the peer after 16 tries (with exponential backoff), we remove from address book completely.
-----------------
.......
## Select Peers to Exchange
When we’re asked for peers, we select them as follows:
@ -86,9 +105,9 @@ Note that the bad behaviour may be detected outside the PEX reactor itself
(for instance, in the mconnection, or another reactor), but it must be communicated to the PEX reactor
so it can remove and mark the peer.
In the PEX, if a peer sends us unsolicited lists of peers,
or if the peer sends too many requests for more peers in a given amount of time,
we Disconnect and Mark.
In the PEX, if a peer sends us an unsolicited list of peers,
or if the peer sends a request too soon after another one,
we Disconnect and MarkBad.
## Trust Metric


+ 27
- 15
node/node.go View File

@ -6,6 +6,7 @@ import (
"fmt"
"net"
"net/http"
"strings"
abci "github.com/tendermint/abci/types"
amino "github.com/tendermint/go-amino"
@ -21,7 +22,6 @@ import (
mempl "github.com/tendermint/tendermint/mempool"
"github.com/tendermint/tendermint/p2p"
"github.com/tendermint/tendermint/p2p/pex"
"github.com/tendermint/tendermint/p2p/trust"
"github.com/tendermint/tendermint/proxy"
rpccore "github.com/tendermint/tendermint/rpc/core"
ctypes "github.com/tendermint/tendermint/rpc/core/types"
@ -99,9 +99,9 @@ type Node struct {
privValidator types.PrivValidator // local node's validator key
// network
sw *p2p.Switch // p2p connections
addrBook pex.AddrBook // known peers
trustMetricStore *trust.TrustMetricStore // trust metrics for all peers
sw *p2p.Switch // p2p connections
addrBook pex.AddrBook // known peers
// trustMetricStore *trust.TrustMetricStore // trust metrics for all peers
// services
eventBus *types.EventBus // pub/sub for services
@ -262,20 +262,33 @@ func NewNode(config *cfg.Config,
sw.AddReactor("EVIDENCE", evidenceReactor)
// Optionally, start the pex reactor
//
// TODO:
//
// We need to set Seeds and PersistentPeers on the switch,
// since it needs to be able to use these (and their DNS names)
// even if the PEX is off. We can include the DNS name in the NetAddress,
// but it would still be nice to have a clear list of the current "PersistentPeers"
// somewhere that we can return with net_info.
//
// Let's assume we always have IDs ... and we just dont authenticate them
// if auth_enc=false.
//
// If PEX is on, it should handle dialing the seeds. Otherwise the switch does it.
var addrBook pex.AddrBook
var trustMetricStore *trust.TrustMetricStore
if config.P2P.PexReactor {
addrBook = pex.NewAddrBook(config.P2P.AddrBookFile(), config.P2P.AddrBookStrict)
addrBook.SetLogger(p2pLogger.With("book", config.P2P.AddrBookFile()))
// Get the trust metric history data
trustHistoryDB, err := dbProvider(&DBContext{"trusthistory", config})
if err != nil {
return nil, err
var seeds []string
if config.P2P.Seeds != "" {
seeds = strings.Split(config.P2P.Seeds, ",")
}
trustMetricStore = trust.NewTrustMetricStore(trustHistoryDB, trust.DefaultConfig())
trustMetricStore.SetLogger(p2pLogger)
var privatePeerIDs []string
if config.P2P.PrivatePeerIDs != "" {
privatePeerIDs = strings.Split(config.P2P.PrivatePeerIDs, ",")
}
// TODO persistent peers ? so we can have their DNS addrs saved
pexReactor := pex.NewPEXReactor(addrBook,
&pex.PEXReactorConfig{
Seeds: cmn.SplitAndTrim(config.P2P.Seeds, ",", " "),
@ -355,9 +368,8 @@ func NewNode(config *cfg.Config,
genesisDoc: genDoc,
privValidator: privValidator,
sw: sw,
addrBook: addrBook,
trustMetricStore: trustMetricStore,
sw: sw,
addrBook: addrBook,
stateDB: stateDB,
blockStore: blockStore,


+ 1
- 0
p2p/netaddress.go View File

@ -22,6 +22,7 @@ type NetAddress struct {
ID ID
IP net.IP
Port uint16
Name string // optional DNS name
str string
}


+ 47
- 10
p2p/pex/pex_reactor.go View File

@ -20,7 +20,10 @@ const (
// PexChannel is a channel for PEX messages
PexChannel = byte(0x00)
maxMsgSize = 1048576 // 1MB
// TODO: make smaller. Should match the maxGetSelection
// this is basically the amplification factor since a request
// msg is like 1 byte ... it can cause us to send msgs of this size!
maxPexMessageSize = 1048576 // 1MB
// ensure we have enough peers
defaultEnsurePeersPeriod = 30 * time.Second
@ -61,7 +64,7 @@ type PEXReactor struct {
book AddrBook
config *PEXReactorConfig
ensurePeersPeriod time.Duration
ensurePeersPeriod time.Duration // TODO: should go in the config
// maps to prevent abuse
requestsSent *cmn.CMap // ID->struct{}: unanswered send requests
@ -70,6 +73,12 @@ type PEXReactor struct {
attemptsToDial sync.Map // address (string) -> {number of attempts (int), last time dialed (time.Time)}
}
func (pexR *PEXReactor) minReceiveRequestInterval() time.Duration {
// NOTE: must be less than ensurePeersPeriod, otherwise we'll request
// peers too quickly from others and they'll think we're bad!
return pexR.ensurePeersPeriod / 3
}
// PEXReactorConfig holds reactor specific configuration data.
type PEXReactorConfig struct {
// Seed/Crawler mode
@ -113,6 +122,9 @@ func (r *PEXReactor) OnStart() error {
}
// return err if user provided a bad seed address
// NOTE: only if its an invalid address.
// If we simply fail to resovle a DNS name,
// we shouldn't exit here ...
if err := r.checkSeeds(); err != nil {
return err
}
@ -195,6 +207,10 @@ func (r *PEXReactor) Receive(chID byte, src Peer, msgBytes []byte) {
}
// Seeds disconnect after sending a batch of addrs
// NOTE: this is a prime candidate for amplification attacks
// so it's important we
// 1) restrict how frequently peers can request
// 2) limit the output size
if r.config.SeedMode {
r.SendAddrs(src, r.book.GetSelectionWithBias(biasToSelectNewPeers))
r.Switch.StopPeerGracefully(src)
@ -213,6 +229,7 @@ func (r *PEXReactor) Receive(chID byte, src Peer, msgBytes []byte) {
}
}
// enforces a minimum amount of time between requests
func (r *PEXReactor) receiveRequest(src Peer) error {
id := string(src.ID())
v := r.lastReceivedRequests.Get(id)
@ -232,8 +249,14 @@ func (r *PEXReactor) receiveRequest(src Peer) error {
}
now := time.Now()
if now.Sub(lastReceived) < r.ensurePeersPeriod/3 {
return fmt.Errorf("Peer (%v) is sending too many PEX requests. Disconnecting", src.ID())
minInterval := r.minReceiveRequestInterval()
if now.Sub(lastReceived) < minInterval {
return fmt.Errorf("Peer (%v) send next PEX request too soon. lastReceived: %v, now: %v, minInterval: %v. Disconnecting",
src.ID(),
lastReceived,
now,
minInterval,
)
}
r.lastReceivedRequests.Set(id, now)
return nil
@ -264,7 +287,11 @@ func (r *PEXReactor) ReceiveAddrs(addrs []*p2p.NetAddress, src Peer) error {
srcAddr := src.NodeInfo().NetAddress()
for _, netAddr := range addrs {
// TODO: make sure correct nodes never send nil and return error
// if a netAddr == nil
if netAddr != nil && !isAddrPrivate(netAddr, r.config.PrivatePeerIDs) {
// TODO: Should we moe the list of private peers into the AddrBook so AddAddress
// can do the check for us, and we don't have to worry about checking before calling ?
err := r.book.AddAddress(netAddr, srcAddr)
if err != nil {
r.Logger.Error("Failed to add new address", "err", err)
@ -360,6 +387,9 @@ func (r *PEXReactor) ensurePeers() {
if connected := r.Switch.Peers().Has(try.ID); connected {
continue
}
// TODO: consider moving some checks from toDial into here
// so we don't even consider dialing peers that we want to wait
// before dialling again, or have dialled too many times already
r.Logger.Info("Will dial address", "addr", try)
toDial[try.ID] = try
}
@ -387,13 +417,17 @@ func (r *PEXReactor) ensurePeers() {
}
}
func (r *PEXReactor) dialPeer(addr *p2p.NetAddress) {
var attempts int
var lastDialed time.Time
if lAttempts, attempted := r.attemptsToDial.Load(addr.DialString()); attempted {
attempts = lAttempts.(_attemptsToDial).number
lastDialed = lAttempts.(_attemptsToDial).lastDialed
func (r *PEXReactor) dialAttemptsInfo(addr *p2p.NetAddress) (attempts int, lastDialed time.Time) {
_attempts, ok := r.attemptsToDial.Load(addr.DialString())
if !ok {
return
}
atd := _attempts.(_attemptsToDial)
return atd.number, atd.lastDialed
}
func (r *PEXReactor) dialPeer(addr *p2p.NetAddress) {
attempts, lastDialed := r.dialAttemptsInfo(addr)
if attempts > maxAttemptsToDial {
r.Logger.Error("Reached max attempts to dial", "addr", addr, "attempts", attempts)
@ -439,6 +473,9 @@ func (r *PEXReactor) checkSeeds() error {
if lSeeds == 0 {
return nil
}
// TODO: don't exit the program if we simply cant resolve a DNS name.
// But if names or addresses are incorrectly speficied (ie. invalid),
// then we should return an err that causes an exit
_, errs := p2p.NewNetAddressStrings(r.config.Seeds)
for _, err := range errs {
if err != nil {


+ 5
- 1
p2p/switch.go View File

@ -351,6 +351,7 @@ func (sw *Switch) IsDialing(id ID) bool {
}
// DialPeersAsync dials a list of peers asynchronously in random order (optionally, making them persistent).
// TODO: remove addrBook arg since it's now set on the switch
func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent bool) error {
netAddrs, errs := NewNetAddressStrings(peers)
// only log errors, dial correct addresses
@ -360,7 +361,10 @@ func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent b
ourAddr := sw.nodeInfo.NetAddress()
// TODO: move this out of here ?
// TODO: this code feels like it's in the wrong place.
// The integration tests depend on the addrBook being saved
// right away but maybe we can change that. Recall that
// the addrBook is only written to disk every 2min
if addrBook != nil {
// add peers to `addrBook`
for _, netAddr := range netAddrs {


+ 2
- 0
rpc/core/net.go View File

@ -47,6 +47,8 @@ func NetInfo() (*ctypes.ResultNetInfo, error) {
ConnectionStatus: peer.Status(),
})
}
// TODO: should we include "num_peers" field for convenience ?
// Let's also include the PersistentPeers and Seeds in here.
return &ctypes.ResultNetInfo{
Listening: listening,
Listeners: listeners,


Loading…
Cancel
Save