|
package p2p
|
|
|
|
import (
|
|
"fmt"
|
|
"math"
|
|
"math/rand"
|
|
"net"
|
|
"sync"
|
|
"time"
|
|
|
|
cfg "github.com/tendermint/tendermint/config"
|
|
"github.com/tendermint/tendermint/p2p/conn"
|
|
cmn "github.com/tendermint/tmlibs/common"
|
|
)
|
|
|
|
const (
|
|
// wait a random amount of time from this interval
|
|
// before dialing peers or reconnecting to help prevent DoS
|
|
dialRandomizerIntervalMilliseconds = 3000
|
|
|
|
// repeatedly try to reconnect for a few minutes
|
|
// ie. 5 * 20 = 100s
|
|
reconnectAttempts = 20
|
|
reconnectInterval = 5 * time.Second
|
|
|
|
// then move into exponential backoff mode for ~1day
|
|
// ie. 3**10 = 16hrs
|
|
reconnectBackOffAttempts = 10
|
|
reconnectBackOffBaseSeconds = 3
|
|
)
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
type AddrBook interface {
|
|
AddAddress(addr *NetAddress, src *NetAddress) error
|
|
MarkGood(*NetAddress)
|
|
Save()
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
// `Switch` handles peer connections and exposes an API to receive incoming messages
|
|
// on `Reactors`. Each `Reactor` is responsible for handling incoming messages of one
|
|
// or more `Channels`. So while sending outgoing messages is typically performed on the peer,
|
|
// incoming messages are received on the reactor.
|
|
type Switch struct {
|
|
cmn.BaseService
|
|
|
|
config *cfg.P2PConfig
|
|
peerConfig *PeerConfig
|
|
listeners []Listener
|
|
reactors map[string]Reactor
|
|
chDescs []*conn.ChannelDescriptor
|
|
reactorsByCh map[byte]Reactor
|
|
peers *PeerSet
|
|
dialing *cmn.CMap
|
|
nodeInfo NodeInfo // our node info
|
|
nodeKey *NodeKey // our node privkey
|
|
addrBook AddrBook
|
|
|
|
filterConnByAddr func(net.Addr) error
|
|
filterConnByID func(ID) error
|
|
|
|
rng *rand.Rand // seed for randomizing dial times and orders
|
|
}
|
|
|
|
func NewSwitch(config *cfg.P2PConfig) *Switch {
|
|
sw := &Switch{
|
|
config: config,
|
|
peerConfig: DefaultPeerConfig(),
|
|
reactors: make(map[string]Reactor),
|
|
chDescs: make([]*conn.ChannelDescriptor, 0),
|
|
reactorsByCh: make(map[byte]Reactor),
|
|
peers: NewPeerSet(),
|
|
dialing: cmn.NewCMap(),
|
|
}
|
|
|
|
// Ensure we have a completely undeterministic PRNG. cmd.RandInt64() draws
|
|
// from a seed that's initialized with OS entropy on process start.
|
|
sw.rng = rand.New(rand.NewSource(cmn.RandInt64()))
|
|
|
|
// TODO: collapse the peerConfig into the config ?
|
|
sw.peerConfig.MConfig.FlushThrottle = time.Duration(config.FlushThrottleTimeout) * time.Millisecond
|
|
sw.peerConfig.MConfig.SendRate = config.SendRate
|
|
sw.peerConfig.MConfig.RecvRate = config.RecvRate
|
|
sw.peerConfig.MConfig.MaxPacketMsgPayloadSize = config.MaxPacketMsgPayloadSize
|
|
sw.peerConfig.AuthEnc = config.AuthEnc
|
|
|
|
sw.BaseService = *cmn.NewBaseService(nil, "P2P Switch", sw)
|
|
return sw
|
|
}
|
|
|
|
//---------------------------------------------------------------------
|
|
// Switch setup
|
|
|
|
// AddReactor adds the given reactor to the switch.
|
|
// NOTE: Not goroutine safe.
|
|
func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor {
|
|
// Validate the reactor.
|
|
// No two reactors can share the same channel.
|
|
reactorChannels := reactor.GetChannels()
|
|
for _, chDesc := range reactorChannels {
|
|
chID := chDesc.ID
|
|
if sw.reactorsByCh[chID] != nil {
|
|
cmn.PanicSanity(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor))
|
|
}
|
|
sw.chDescs = append(sw.chDescs, chDesc)
|
|
sw.reactorsByCh[chID] = reactor
|
|
}
|
|
sw.reactors[name] = reactor
|
|
reactor.SetSwitch(sw)
|
|
return reactor
|
|
}
|
|
|
|
// Reactors returns a map of reactors registered on the switch.
|
|
// NOTE: Not goroutine safe.
|
|
func (sw *Switch) Reactors() map[string]Reactor {
|
|
return sw.reactors
|
|
}
|
|
|
|
// Reactor returns the reactor with the given name.
|
|
// NOTE: Not goroutine safe.
|
|
func (sw *Switch) Reactor(name string) Reactor {
|
|
return sw.reactors[name]
|
|
}
|
|
|
|
// AddListener adds the given listener to the switch for listening to incoming peer connections.
|
|
// NOTE: Not goroutine safe.
|
|
func (sw *Switch) AddListener(l Listener) {
|
|
sw.listeners = append(sw.listeners, l)
|
|
}
|
|
|
|
// Listeners returns the list of listeners the switch listens on.
|
|
// NOTE: Not goroutine safe.
|
|
func (sw *Switch) Listeners() []Listener {
|
|
return sw.listeners
|
|
}
|
|
|
|
// IsListening returns true if the switch has at least one listener.
|
|
// NOTE: Not goroutine safe.
|
|
func (sw *Switch) IsListening() bool {
|
|
return len(sw.listeners) > 0
|
|
}
|
|
|
|
// SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes.
|
|
// NOTE: Not goroutine safe.
|
|
func (sw *Switch) SetNodeInfo(nodeInfo NodeInfo) {
|
|
sw.nodeInfo = nodeInfo
|
|
}
|
|
|
|
// NodeInfo returns the switch's NodeInfo.
|
|
// NOTE: Not goroutine safe.
|
|
func (sw *Switch) NodeInfo() NodeInfo {
|
|
return sw.nodeInfo
|
|
}
|
|
|
|
// SetNodeKey sets the switch's private key for authenticated encryption.
|
|
// NOTE: Not goroutine safe.
|
|
func (sw *Switch) SetNodeKey(nodeKey *NodeKey) {
|
|
sw.nodeKey = nodeKey
|
|
}
|
|
|
|
//---------------------------------------------------------------------
|
|
// Service start/stop
|
|
|
|
// OnStart implements BaseService. It starts all the reactors, peers, and listeners.
|
|
func (sw *Switch) OnStart() error {
|
|
// Start reactors
|
|
for _, reactor := range sw.reactors {
|
|
err := reactor.Start()
|
|
if err != nil {
|
|
return cmn.ErrorWrap(err, "failed to start %v", reactor)
|
|
}
|
|
}
|
|
// Start listeners
|
|
for _, listener := range sw.listeners {
|
|
go sw.listenerRoutine(listener)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// OnStop implements BaseService. It stops all listeners, peers, and reactors.
|
|
func (sw *Switch) OnStop() {
|
|
// Stop listeners
|
|
for _, listener := range sw.listeners {
|
|
listener.Stop()
|
|
}
|
|
sw.listeners = nil
|
|
// Stop peers
|
|
for _, peer := range sw.peers.List() {
|
|
peer.Stop()
|
|
sw.peers.Remove(peer)
|
|
}
|
|
// Stop reactors
|
|
sw.Logger.Debug("Switch: Stopping reactors")
|
|
for _, reactor := range sw.reactors {
|
|
reactor.Stop()
|
|
}
|
|
}
|
|
|
|
//---------------------------------------------------------------------
|
|
// Peers
|
|
|
|
// Broadcast runs a go routine for each attempted send, which will block trying
|
|
// to send for defaultSendTimeoutSeconds. Returns a channel which receives
|
|
// success values for each attempted send (false if times out). Channel will be
|
|
// closed once msg bytes are sent to all peers (or time out).
|
|
//
|
|
// NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved.
|
|
func (sw *Switch) Broadcast(chID byte, msgBytes []byte) chan bool {
|
|
successChan := make(chan bool, len(sw.peers.List()))
|
|
sw.Logger.Debug("Broadcast", "channel", chID, "msgBytes", fmt.Sprintf("%X", msgBytes))
|
|
var wg sync.WaitGroup
|
|
for _, peer := range sw.peers.List() {
|
|
wg.Add(1)
|
|
go func(peer Peer) {
|
|
defer wg.Done()
|
|
success := peer.Send(chID, msgBytes)
|
|
successChan <- success
|
|
}(peer)
|
|
}
|
|
go func() {
|
|
wg.Wait()
|
|
close(successChan)
|
|
}()
|
|
return successChan
|
|
}
|
|
|
|
// NumPeers returns the count of outbound/inbound and outbound-dialing peers.
|
|
func (sw *Switch) NumPeers() (outbound, inbound, dialing int) {
|
|
peers := sw.peers.List()
|
|
for _, peer := range peers {
|
|
if peer.IsOutbound() {
|
|
outbound++
|
|
} else {
|
|
inbound++
|
|
}
|
|
}
|
|
dialing = sw.dialing.Size()
|
|
return
|
|
}
|
|
|
|
// Peers returns the set of peers that are connected to the switch.
|
|
func (sw *Switch) Peers() IPeerSet {
|
|
return sw.peers
|
|
}
|
|
|
|
// StopPeerForError disconnects from a peer due to external error.
|
|
// If the peer is persistent, it will attempt to reconnect.
|
|
// TODO: make record depending on reason.
|
|
func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) {
|
|
sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason)
|
|
sw.stopAndRemovePeer(peer, reason)
|
|
|
|
if peer.IsPersistent() {
|
|
go sw.reconnectToPeer(peer)
|
|
}
|
|
}
|
|
|
|
// StopPeerGracefully disconnects from a peer gracefully.
|
|
// TODO: handle graceful disconnects.
|
|
func (sw *Switch) StopPeerGracefully(peer Peer) {
|
|
sw.Logger.Info("Stopping peer gracefully")
|
|
sw.stopAndRemovePeer(peer, nil)
|
|
}
|
|
|
|
func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) {
|
|
sw.peers.Remove(peer)
|
|
peer.Stop()
|
|
for _, reactor := range sw.reactors {
|
|
reactor.RemovePeer(peer, reason)
|
|
}
|
|
}
|
|
|
|
// reconnectToPeer tries to reconnect to the peer, first repeatedly
|
|
// with a fixed interval, then with exponential backoff.
|
|
// If no success after all that, it stops trying, and leaves it
|
|
// to the PEX/Addrbook to find the peer again
|
|
func (sw *Switch) reconnectToPeer(peer Peer) {
|
|
// NOTE this will connect to the self reported address,
|
|
// not necessarily the original we dialed
|
|
netAddr := peer.NodeInfo().NetAddress()
|
|
start := time.Now()
|
|
sw.Logger.Info("Reconnecting to peer", "peer", peer)
|
|
for i := 0; i < reconnectAttempts; i++ {
|
|
if !sw.IsRunning() {
|
|
return
|
|
}
|
|
|
|
err := sw.DialPeerWithAddress(netAddr, true)
|
|
if err != nil {
|
|
sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "peer", peer)
|
|
// sleep a set amount
|
|
sw.randomSleep(reconnectInterval)
|
|
continue
|
|
} else {
|
|
return
|
|
}
|
|
}
|
|
|
|
sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff",
|
|
"peer", peer, "elapsed", time.Since(start))
|
|
for i := 0; i < reconnectBackOffAttempts; i++ {
|
|
if !sw.IsRunning() {
|
|
return
|
|
}
|
|
|
|
// sleep an exponentially increasing amount
|
|
sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i))
|
|
sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second)
|
|
err := sw.DialPeerWithAddress(netAddr, true)
|
|
if err == nil {
|
|
return // success
|
|
}
|
|
sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "peer", peer)
|
|
}
|
|
sw.Logger.Error("Failed to reconnect to peer. Giving up", "peer", peer, "elapsed", time.Since(start))
|
|
}
|
|
|
|
// SetAddrBook allows to set address book on Switch.
|
|
func (sw *Switch) SetAddrBook(addrBook AddrBook) {
|
|
sw.addrBook = addrBook
|
|
}
|
|
|
|
// MarkPeerAsGood marks the given peer as good when it did something useful
|
|
// like contributed to consensus.
|
|
func (sw *Switch) MarkPeerAsGood(peer Peer) {
|
|
if sw.addrBook != nil {
|
|
sw.addrBook.MarkGood(peer.NodeInfo().NetAddress())
|
|
}
|
|
}
|
|
|
|
//---------------------------------------------------------------------
|
|
// Dialing
|
|
|
|
// IsDialing returns true if the switch is currently dialing the given ID.
|
|
func (sw *Switch) IsDialing(id ID) bool {
|
|
return sw.dialing.Has(string(id))
|
|
}
|
|
|
|
// DialPeersAsync dials a list of peers asynchronously in random order (optionally, making them persistent).
|
|
func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent bool) error {
|
|
netAddrs, errs := NewNetAddressStrings(peers)
|
|
for _, err := range errs {
|
|
sw.Logger.Error("Error in peer's address", "err", err)
|
|
}
|
|
|
|
if addrBook != nil {
|
|
// add peers to `addrBook`
|
|
ourAddr := sw.nodeInfo.NetAddress()
|
|
for _, netAddr := range netAddrs {
|
|
// do not add our address or ID
|
|
if netAddr.Same(ourAddr) {
|
|
continue
|
|
}
|
|
// TODO: move this out of here ?
|
|
addrBook.AddAddress(netAddr, ourAddr)
|
|
}
|
|
// Persist some peers to disk right away.
|
|
// NOTE: integration tests depend on this
|
|
addrBook.Save()
|
|
}
|
|
|
|
// permute the list, dial them in random order.
|
|
perm := sw.rng.Perm(len(netAddrs))
|
|
for i := 0; i < len(perm); i++ {
|
|
go func(i int) {
|
|
sw.randomSleep(0)
|
|
j := perm[i]
|
|
err := sw.DialPeerWithAddress(netAddrs[j], persistent)
|
|
if err != nil {
|
|
sw.Logger.Error("Error dialing peer", "err", err)
|
|
}
|
|
}(i)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects and authenticates successfully.
|
|
// If `persistent == true`, the switch will always try to reconnect to this peer if the connection ever fails.
|
|
func (sw *Switch) DialPeerWithAddress(addr *NetAddress, persistent bool) error {
|
|
sw.dialing.Set(string(addr.ID), addr)
|
|
defer sw.dialing.Delete(string(addr.ID))
|
|
return sw.addOutboundPeerWithConfig(addr, sw.peerConfig, persistent)
|
|
}
|
|
|
|
// sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds]
|
|
func (sw *Switch) randomSleep(interval time.Duration) {
|
|
r := time.Duration(sw.rng.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond
|
|
time.Sleep(r + interval)
|
|
}
|
|
|
|
//------------------------------------------------------------------------------------
|
|
// Connection filtering
|
|
|
|
// FilterConnByAddr returns an error if connecting to the given address is forbidden.
|
|
func (sw *Switch) FilterConnByAddr(addr net.Addr) error {
|
|
if sw.filterConnByAddr != nil {
|
|
return sw.filterConnByAddr(addr)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// FilterConnByID returns an error if connecting to the given peer ID is forbidden.
|
|
func (sw *Switch) FilterConnByID(id ID) error {
|
|
if sw.filterConnByID != nil {
|
|
return sw.filterConnByID(id)
|
|
}
|
|
return nil
|
|
|
|
}
|
|
|
|
// SetAddrFilter sets the function for filtering connections by address.
|
|
func (sw *Switch) SetAddrFilter(f func(net.Addr) error) {
|
|
sw.filterConnByAddr = f
|
|
}
|
|
|
|
// SetIDFilter sets the function for filtering connections by peer ID.
|
|
func (sw *Switch) SetIDFilter(f func(ID) error) {
|
|
sw.filterConnByID = f
|
|
}
|
|
|
|
//------------------------------------------------------------------------------------
|
|
|
|
func (sw *Switch) listenerRoutine(l Listener) {
|
|
for {
|
|
inConn, ok := <-l.Connections()
|
|
if !ok {
|
|
break
|
|
}
|
|
|
|
// ignore connection if we already have enough
|
|
maxPeers := sw.config.MaxNumPeers
|
|
if maxPeers <= sw.peers.Size() {
|
|
sw.Logger.Info("Ignoring inbound connection: already have enough peers", "address", inConn.RemoteAddr().String(), "numPeers", sw.peers.Size(), "max", maxPeers)
|
|
continue
|
|
}
|
|
|
|
// New inbound connection!
|
|
err := sw.addInboundPeerWithConfig(inConn, sw.peerConfig)
|
|
if err != nil {
|
|
sw.Logger.Info("Ignoring inbound connection: error while adding peer", "address", inConn.RemoteAddr().String(), "err", err)
|
|
continue
|
|
}
|
|
}
|
|
|
|
// cleanup
|
|
}
|
|
|
|
func (sw *Switch) addInboundPeerWithConfig(conn net.Conn, config *PeerConfig) error {
|
|
peerConn, err := newInboundPeerConn(conn, config, sw.nodeKey.PrivKey)
|
|
if err != nil {
|
|
conn.Close() // peer is nil
|
|
return err
|
|
}
|
|
if err = sw.addPeer(peerConn); err != nil {
|
|
peerConn.CloseConn()
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// dial the peer; make secret connection; authenticate against the dialed ID;
|
|
// add the peer.
|
|
func (sw *Switch) addOutboundPeerWithConfig(addr *NetAddress, config *PeerConfig, persistent bool) error {
|
|
sw.Logger.Info("Dialing peer", "address", addr)
|
|
peerConn, err := newOutboundPeerConn(addr, config, persistent, sw.nodeKey.PrivKey)
|
|
if err != nil {
|
|
sw.Logger.Error("Failed to dial peer", "address", addr, "err", err)
|
|
return err
|
|
}
|
|
|
|
if err := sw.addPeer(peerConn); err != nil {
|
|
sw.Logger.Error("Failed to add peer", "address", addr, "err", err)
|
|
peerConn.CloseConn()
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// addPeer performs the Tendermint P2P handshake with a peer
|
|
// that already has a SecretConnection. If all goes well,
|
|
// it starts the peer and adds it to the switch.
|
|
// NOTE: This performs a blocking handshake before the peer is added.
|
|
// NOTE: If error is returned, caller is responsible for calling peer.CloseConn()
|
|
func (sw *Switch) addPeer(pc peerConn) error {
|
|
|
|
addr := pc.conn.RemoteAddr()
|
|
if err := sw.FilterConnByAddr(addr); err != nil {
|
|
return err
|
|
}
|
|
|
|
// NOTE: if AuthEnc==false, we don't have a peerID until after the handshake.
|
|
// If AuthEnc==true then we already know the ID and could do the checks first before the handshake,
|
|
// but it's simple to just deal with both cases the same after the handshake.
|
|
|
|
// Exchange NodeInfo on the conn
|
|
peerNodeInfo, err := pc.HandshakeTimeout(sw.nodeInfo, time.Duration(sw.peerConfig.HandshakeTimeout*time.Second))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
peerID := peerNodeInfo.ID()
|
|
|
|
// ensure connection key matches self reported key
|
|
if pc.config.AuthEnc {
|
|
connID := pc.ID()
|
|
|
|
if peerID != connID {
|
|
return fmt.Errorf("nodeInfo.ID() (%v) doesn't match conn.ID() (%v)",
|
|
peerID, connID)
|
|
}
|
|
}
|
|
|
|
// Validate the peers nodeInfo
|
|
if err := peerNodeInfo.Validate(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Avoid self
|
|
if sw.nodeKey.ID() == peerID {
|
|
return ErrSwitchConnectToSelf
|
|
}
|
|
|
|
// Avoid duplicate
|
|
if sw.peers.Has(peerID) {
|
|
return ErrSwitchDuplicatePeer
|
|
}
|
|
|
|
// Filter peer against ID white list
|
|
if err := sw.FilterConnByID(peerID); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Check version, chain id
|
|
if err := sw.nodeInfo.CompatibleWith(peerNodeInfo); err != nil {
|
|
return err
|
|
}
|
|
|
|
peer := newPeer(pc, peerNodeInfo, sw.reactorsByCh, sw.chDescs, sw.StopPeerForError)
|
|
peer.SetLogger(sw.Logger.With("peer", addr))
|
|
|
|
peer.Logger.Info("Successful handshake with peer", "peerNodeInfo", peerNodeInfo)
|
|
|
|
// All good. Start peer
|
|
if sw.IsRunning() {
|
|
sw.startInitPeer(peer)
|
|
}
|
|
|
|
// Add the peer to .peers.
|
|
// We start it first so that a peer in the list is safe to Stop.
|
|
// It should not err since we already checked peers.Has().
|
|
if err := sw.peers.Add(peer); err != nil {
|
|
return err
|
|
}
|
|
|
|
sw.Logger.Info("Added peer", "peer", peer)
|
|
return nil
|
|
}
|
|
|
|
func (sw *Switch) startInitPeer(peer *peer) {
|
|
err := peer.Start() // spawn send/recv routines
|
|
if err != nil {
|
|
// Should never happen
|
|
sw.Logger.Error("Error starting peer", "peer", peer, "err", err)
|
|
}
|
|
|
|
for _, reactor := range sw.reactors {
|
|
reactor.AddPeer(peer)
|
|
}
|
|
}
|