You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

649 lines
19 KiB

package p2p
import (
"fmt"
"math"
"math/rand"
"net"
"time"
"github.com/pkg/errors"
crypto "github.com/tendermint/go-crypto"
cfg "github.com/tendermint/tendermint/config"
cmn "github.com/tendermint/tmlibs/common"
)
const (
// wait a random amount of time from this interval
// before dialing seeds or reconnecting to help prevent DoS
dialRandomizerIntervalMilliseconds = 3000
// repeatedly try to reconnect for a few minutes
// ie. 5 * 20 = 100s
reconnectAttempts = 20
reconnectInterval = 5 * time.Second
// then move into exponential backoff mode for ~1day
// ie. 3**10 = 16hrs
reconnectBackOffAttempts = 10
reconnectBackOffBaseSeconds = 3
)
type Reactor interface {
cmn.Service // Start, Stop
SetSwitch(*Switch)
GetChannels() []*ChannelDescriptor
AddPeer(peer Peer)
RemovePeer(peer Peer, reason interface{})
Receive(chID byte, peer Peer, msgBytes []byte) // CONTRACT: msgBytes are not nil
}
//--------------------------------------
type BaseReactor struct {
cmn.BaseService // Provides Start, Stop, .Quit
Switch *Switch
}
func NewBaseReactor(name string, impl Reactor) *BaseReactor {
return &BaseReactor{
BaseService: *cmn.NewBaseService(nil, name, impl),
Switch: nil,
}
}
func (br *BaseReactor) SetSwitch(sw *Switch) {
br.Switch = sw
}
func (_ *BaseReactor) GetChannels() []*ChannelDescriptor { return nil }
func (_ *BaseReactor) AddPeer(peer Peer) {}
func (_ *BaseReactor) RemovePeer(peer Peer, reason interface{}) {}
func (_ *BaseReactor) Receive(chID byte, peer Peer, msgBytes []byte) {}
//-----------------------------------------------------------------------------
/*
The `Switch` handles peer connections and exposes an API to receive incoming messages
on `Reactors`. Each `Reactor` is responsible for handling incoming messages of one
or more `Channels`. So while sending outgoing messages is typically performed on the peer,
incoming messages are received on the reactor.
*/
type Switch struct {
cmn.BaseService
config *cfg.P2PConfig
peerConfig *PeerConfig
listeners []Listener
reactors map[string]Reactor
chDescs []*ChannelDescriptor
reactorsByCh map[byte]Reactor
peers *PeerSet
dialing *cmn.CMap
nodeInfo *NodeInfo // our node info
nodePrivKey crypto.PrivKey // our node privkey
filterConnByAddr func(net.Addr) error
filterConnByPubKey func(crypto.PubKey) error
rng *rand.Rand // seed for randomizing dial times and orders
}
var (
ErrSwitchDuplicatePeer = errors.New("Duplicate peer")
)
func NewSwitch(config *cfg.P2PConfig) *Switch {
sw := &Switch{
config: config,
peerConfig: DefaultPeerConfig(),
reactors: make(map[string]Reactor),
chDescs: make([]*ChannelDescriptor, 0),
reactorsByCh: make(map[byte]Reactor),
peers: NewPeerSet(),
dialing: cmn.NewCMap(),
nodeInfo: nil,
}
// Ensure we have a completely undeterministic PRNG. cmd.RandInt64() draws
// from a seed that's initialized with OS entropy on process start.
sw.rng = rand.New(rand.NewSource(cmn.RandInt64()))
// TODO: collapse the peerConfig into the config ?
sw.peerConfig.MConfig.flushThrottle = time.Duration(config.FlushThrottleTimeout) * time.Millisecond
sw.peerConfig.MConfig.SendRate = config.SendRate
sw.peerConfig.MConfig.RecvRate = config.RecvRate
sw.peerConfig.MConfig.maxMsgPacketPayloadSize = config.MaxMsgPacketPayloadSize
sw.BaseService = *cmn.NewBaseService(nil, "P2P Switch", sw)
return sw
}
// AddReactor adds the given reactor to the switch.
// NOTE: Not goroutine safe.
func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor {
// Validate the reactor.
// No two reactors can share the same channel.
reactorChannels := reactor.GetChannels()
for _, chDesc := range reactorChannels {
chID := chDesc.ID
if sw.reactorsByCh[chID] != nil {
cmn.PanicSanity(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor))
}
sw.chDescs = append(sw.chDescs, chDesc)
sw.reactorsByCh[chID] = reactor
}
sw.reactors[name] = reactor
reactor.SetSwitch(sw)
return reactor
}
// Reactors returns a map of reactors registered on the switch.
// NOTE: Not goroutine safe.
func (sw *Switch) Reactors() map[string]Reactor {
return sw.reactors
}
// Reactor returns the reactor with the given name.
// NOTE: Not goroutine safe.
func (sw *Switch) Reactor(name string) Reactor {
return sw.reactors[name]
}
// AddListener adds the given listener to the switch for listening to incoming peer connections.
// NOTE: Not goroutine safe.
func (sw *Switch) AddListener(l Listener) {
sw.listeners = append(sw.listeners, l)
}
// Listeners returns the list of listeners the switch listens on.
// NOTE: Not goroutine safe.
func (sw *Switch) Listeners() []Listener {
return sw.listeners
}
// IsListening returns true if the switch has at least one listener.
// NOTE: Not goroutine safe.
func (sw *Switch) IsListening() bool {
return len(sw.listeners) > 0
}
// SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes.
// NOTE: Not goroutine safe.
func (sw *Switch) SetNodeInfo(nodeInfo *NodeInfo) {
sw.nodeInfo = nodeInfo
}
// NodeInfo returns the switch's NodeInfo.
// NOTE: Not goroutine safe.
func (sw *Switch) NodeInfo() *NodeInfo {
return sw.nodeInfo
}
// SetNodePrivKey sets the switch's private key for authenticated encryption.
// NOTE: Overwrites sw.nodeInfo.PubKey.
// NOTE: Not goroutine safe.
func (sw *Switch) SetNodePrivKey(nodePrivKey crypto.PrivKey) {
sw.nodePrivKey = nodePrivKey
if sw.nodeInfo != nil {
sw.nodeInfo.PubKey = nodePrivKey.PubKey()
}
}
// OnStart implements BaseService. It starts all the reactors, peers, and listeners.
func (sw *Switch) OnStart() error {
// Start reactors
for _, reactor := range sw.reactors {
err := reactor.Start()
if err != nil {
return errors.Wrapf(err, "failed to start %v", reactor)
}
}
// Start listeners
for _, listener := range sw.listeners {
go sw.listenerRoutine(listener)
}
return nil
}
// OnStop implements BaseService. It stops all listeners, peers, and reactors.
func (sw *Switch) OnStop() {
// Stop listeners
for _, listener := range sw.listeners {
listener.Stop()
}
sw.listeners = nil
// Stop peers
for _, peer := range sw.peers.List() {
peer.Stop()
sw.peers.Remove(peer)
}
// Stop reactors
sw.Logger.Debug("Switch: Stopping reactors")
for _, reactor := range sw.reactors {
reactor.Stop()
}
}
// addPeer checks the given peer's validity, performs a handshake, and adds the
// peer to the switch and to all registered reactors.
// NOTE: This performs a blocking handshake before the peer is added.
// NOTE: If error is returned, caller is responsible for calling peer.CloseConn()
func (sw *Switch) addPeer(peer *peer) error {
if err := sw.FilterConnByAddr(peer.Addr()); err != nil {
return err
}
if err := sw.FilterConnByPubKey(peer.PubKey()); err != nil {
return err
}
if err := peer.HandshakeTimeout(sw.nodeInfo, time.Duration(sw.peerConfig.HandshakeTimeout*time.Second)); err != nil {
return err
}
// Avoid self
if sw.nodeInfo.PubKey.Equals(peer.PubKey().Wrap()) {
return errors.New("Ignoring connection from self")
}
// Check version, chain id
if err := sw.nodeInfo.CompatibleWith(peer.NodeInfo()); err != nil {
return err
}
// Check for duplicate peer
if sw.peers.Has(peer.Key()) {
return ErrSwitchDuplicatePeer
}
// Start peer
if sw.IsRunning() {
sw.startInitPeer(peer)
}
// Add the peer to .peers.
// We start it first so that a peer in the list is safe to Stop.
// It should not err since we already checked peers.Has().
if err := sw.peers.Add(peer); err != nil {
return err
}
sw.Logger.Info("Added peer", "peer", peer)
return nil
}
// FilterConnByAddr returns an error if connecting to the given address is forbidden.
func (sw *Switch) FilterConnByAddr(addr net.Addr) error {
if sw.filterConnByAddr != nil {
return sw.filterConnByAddr(addr)
}
return nil
}
// FilterConnByPubKey returns an error if connecting to the given public key is forbidden.
func (sw *Switch) FilterConnByPubKey(pubkey crypto.PubKey) error {
if sw.filterConnByPubKey != nil {
return sw.filterConnByPubKey(pubkey)
}
return nil
}
// SetAddrFilter sets the function for filtering connections by address.
func (sw *Switch) SetAddrFilter(f func(net.Addr) error) {
sw.filterConnByAddr = f
}
// SetPubKeyFilter sets the function for filtering connections by public key.
func (sw *Switch) SetPubKeyFilter(f func(crypto.PubKey) error) {
sw.filterConnByPubKey = f
}
func (sw *Switch) startInitPeer(peer *peer) {
err := peer.Start() // spawn send/recv routines
if err != nil {
// Should never happen
sw.Logger.Error("Error starting peer", "peer", peer, "err", err)
}
for _, reactor := range sw.reactors {
reactor.AddPeer(peer)
}
}
// DialSeeds dials a list of seeds asynchronously in random order.
func (sw *Switch) DialSeeds(addrBook *AddrBook, seeds []string) error {
netAddrs, errs := NewNetAddressStrings(seeds)
for _, err := range errs {
sw.Logger.Error("Error in seed's address", "err", err)
}
if addrBook != nil {
// add seeds to `addrBook`
ourAddrS := sw.nodeInfo.ListenAddr
ourAddr, _ := NewNetAddressString(ourAddrS)
for _, netAddr := range netAddrs {
// do not add ourselves
if netAddr.Equals(ourAddr) {
continue
}
addrBook.AddAddress(netAddr, ourAddr)
}
addrBook.Save()
}
// permute the list, dial them in random order.
perm := sw.rng.Perm(len(netAddrs))
for i := 0; i < len(perm); i++ {
go func(i int) {
sw.randomSleep(0)
j := perm[i]
sw.dialSeed(netAddrs[j])
}(i)
}
return nil
}
// sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds]
func (sw *Switch) randomSleep(interval time.Duration) {
r := time.Duration(sw.rng.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond
time.Sleep(r + interval)
}
func (sw *Switch) dialSeed(addr *NetAddress) {
peer, err := sw.DialPeerWithAddress(addr, true)
if err != nil {
sw.Logger.Error("Error dialing seed", "err", err)
} else {
sw.Logger.Info("Connected to seed", "peer", peer)
}
}
// DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects successfully.
// If `persistent == true`, the switch will always try to reconnect to this peer if the connection ever fails.
func (sw *Switch) DialPeerWithAddress(addr *NetAddress, persistent bool) (Peer, error) {
sw.dialing.Set(addr.IP.String(), addr)
defer sw.dialing.Delete(addr.IP.String())
sw.Logger.Info("Dialing peer", "address", addr)
peer, err := newOutboundPeer(addr, sw.reactorsByCh, sw.chDescs, sw.StopPeerForError, sw.nodePrivKey, sw.peerConfig)
if err != nil {
sw.Logger.Error("Failed to dial peer", "address", addr, "err", err)
return nil, err
}
peer.SetLogger(sw.Logger.With("peer", addr))
if persistent {
peer.makePersistent()
}
err = sw.addPeer(peer)
if err != nil {
sw.Logger.Error("Failed to add peer", "address", addr, "err", err)
peer.CloseConn()
return nil, err
}
sw.Logger.Info("Dialed and added peer", "address", addr, "peer", peer)
return peer, nil
}
// IsDialing returns true if the switch is currently dialing the given address.
func (sw *Switch) IsDialing(addr *NetAddress) bool {
return sw.dialing.Has(addr.IP.String())
}
// Broadcast runs a go routine for each attempted send, which will block
// trying to send for defaultSendTimeoutSeconds. Returns a channel
// which receives success values for each attempted send (false if times out).
// NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved.
// TODO: Something more intelligent.
func (sw *Switch) Broadcast(chID byte, msg interface{}) chan bool {
successChan := make(chan bool, len(sw.peers.List()))
sw.Logger.Debug("Broadcast", "channel", chID, "msg", msg)
for _, peer := range sw.peers.List() {
go func(peer Peer) {
success := peer.Send(chID, msg)
successChan <- success
}(peer)
}
return successChan
}
// NumPeers returns the count of outbound/inbound and outbound-dialing peers.
func (sw *Switch) NumPeers() (outbound, inbound, dialing int) {
peers := sw.peers.List()
for _, peer := range peers {
if peer.IsOutbound() {
outbound++
} else {
inbound++
}
}
dialing = sw.dialing.Size()
return
}
// Peers returns the set of peers that are connected to the switch.
func (sw *Switch) Peers() IPeerSet {
return sw.peers
}
// StopPeerForError disconnects from a peer due to external error.
// If the peer is persistent, it will attempt to reconnect.
// TODO: make record depending on reason.
func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) {
sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason)
sw.stopAndRemovePeer(peer, reason)
if peer.IsPersistent() {
go sw.reconnectToPeer(peer)
}
}
// reconnectToPeer tries to reconnect to the peer, first repeatedly
// with a fixed interval, then with exponential backoff.
// If no success after all that, it stops trying, and leaves it
// to the PEX/Addrbook to find the peer again
func (sw *Switch) reconnectToPeer(peer Peer) {
addr, _ := NewNetAddressString(peer.NodeInfo().RemoteAddr)
start := time.Now()
sw.Logger.Info("Reconnecting to peer", "peer", peer)
for i := 0; i < reconnectAttempts; i++ {
if !sw.IsRunning() {
return
}
peer, err := sw.DialPeerWithAddress(addr, true)
if err != nil {
sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "peer", peer)
// sleep a set amount
sw.randomSleep(reconnectInterval)
continue
} else {
sw.Logger.Info("Reconnected to peer", "peer", peer)
return
}
}
sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff",
"peer", peer, "elapsed", time.Since(start))
for i := 0; i < reconnectBackOffAttempts; i++ {
if !sw.IsRunning() {
return
}
// sleep an exponentially increasing amount
sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i))
sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second)
peer, err := sw.DialPeerWithAddress(addr, true)
if err != nil {
sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "peer", peer)
continue
} else {
sw.Logger.Info("Reconnected to peer", "peer", peer)
return
}
}
sw.Logger.Error("Failed to reconnect to peer. Giving up", "peer", peer, "elapsed", time.Since(start))
}
// StopPeerGracefully disconnects from a peer gracefully.
// TODO: handle graceful disconnects.
func (sw *Switch) StopPeerGracefully(peer Peer) {
sw.Logger.Info("Stopping peer gracefully")
sw.stopAndRemovePeer(peer, nil)
}
func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) {
sw.peers.Remove(peer)
peer.Stop()
for _, reactor := range sw.reactors {
reactor.RemovePeer(peer, reason)
}
}
func (sw *Switch) listenerRoutine(l Listener) {
for {
inConn, ok := <-l.Connections()
if !ok {
break
}
// ignore connection if we already have enough
maxPeers := sw.config.MaxNumPeers
if maxPeers <= sw.peers.Size() {
sw.Logger.Info("Ignoring inbound connection: already have enough peers", "address", inConn.RemoteAddr().String(), "numPeers", sw.peers.Size(), "max", maxPeers)
continue
}
// New inbound connection!
err := sw.addPeerWithConnectionAndConfig(inConn, sw.peerConfig)
if err != nil {
sw.Logger.Info("Ignoring inbound connection: error while adding peer", "address", inConn.RemoteAddr().String(), "err", err)
continue
}
// NOTE: We don't yet have the listening port of the
// remote (if they have a listener at all).
// The peerHandshake will handle that.
}
// cleanup
}
//------------------------------------------------------------------
// Connects switches via arbitrary net.Conn. Used for testing.
// MakeConnectedSwitches returns n switches, connected according to the connect func.
// If connect==Connect2Switches, the switches will be fully connected.
// initSwitch defines how the i'th switch should be initialized (ie. with what reactors).
// NOTE: panics if any switch fails to start.
func MakeConnectedSwitches(cfg *cfg.P2PConfig, n int, initSwitch func(int, *Switch) *Switch, connect func([]*Switch, int, int)) []*Switch {
switches := make([]*Switch, n)
for i := 0; i < n; i++ {
switches[i] = makeSwitch(cfg, i, "testing", "123.123.123", initSwitch)
}
if err := StartSwitches(switches); err != nil {
panic(err)
}
for i := 0; i < n; i++ {
for j := i + 1; j < n; j++ {
connect(switches, i, j)
}
}
return switches
}
// Connect2Switches will connect switches i and j via net.Pipe().
// Blocks until a connection is established.
// NOTE: caller ensures i and j are within bounds.
func Connect2Switches(switches []*Switch, i, j int) {
switchI := switches[i]
switchJ := switches[j]
c1, c2 := netPipe()
doneCh := make(chan struct{})
go func() {
err := switchI.addPeerWithConnection(c1)
if err != nil {
panic(err)
}
doneCh <- struct{}{}
}()
go func() {
err := switchJ.addPeerWithConnection(c2)
if err != nil {
panic(err)
}
doneCh <- struct{}{}
}()
<-doneCh
<-doneCh
}
// StartSwitches calls sw.Start() for each given switch.
// It returns the first encountered error.
func StartSwitches(switches []*Switch) error {
for _, s := range switches {
err := s.Start() // start switch and reactors
if err != nil {
return err
}
}
return nil
}
func makeSwitch(cfg *cfg.P2PConfig, i int, network, version string, initSwitch func(int, *Switch) *Switch) *Switch {
privKey := crypto.GenPrivKeyEd25519()
// new switch, add reactors
// TODO: let the config be passed in?
s := initSwitch(i, NewSwitch(cfg))
s.SetNodeInfo(&NodeInfo{
PubKey: privKey.PubKey(),
Moniker: cmn.Fmt("switch%d", i),
Network: network,
Version: version,
RemoteAddr: cmn.Fmt("%v:%v", network, rand.Intn(64512)+1023),
ListenAddr: cmn.Fmt("%v:%v", network, rand.Intn(64512)+1023),
})
s.SetNodePrivKey(privKey.Wrap())
return s
}
func (sw *Switch) addPeerWithConnection(conn net.Conn) error {
peer, err := newInboundPeer(conn, sw.reactorsByCh, sw.chDescs, sw.StopPeerForError, sw.nodePrivKey, sw.peerConfig)
if err != nil {
if err := conn.Close(); err != nil {
sw.Logger.Error("Error closing connection", "err", err)
}
return err
}
peer.SetLogger(sw.Logger.With("peer", conn.RemoteAddr()))
if err = sw.addPeer(peer); err != nil {
peer.CloseConn()
return err
}
return nil
}
func (sw *Switch) addPeerWithConnectionAndConfig(conn net.Conn, config *PeerConfig) error {
peer, err := newInboundPeer(conn, sw.reactorsByCh, sw.chDescs, sw.StopPeerForError, sw.nodePrivKey, config)
if err != nil {
if err := conn.Close(); err != nil {
sw.Logger.Error("Error closing connection", "err", err)
}
return err
}
peer.SetLogger(sw.Logger.With("peer", conn.RemoteAddr()))
if err = sw.addPeer(peer); err != nil {
peer.CloseConn()
return err
}
return nil
}