You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

541 lines
16 KiB

9 years ago
9 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
8 years ago
9 years ago
7 years ago
7 years ago
9 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
9 years ago
7 years ago
7 years ago
9 years ago
9 years ago
9 years ago
7 years ago
9 years ago
9 years ago
9 years ago
9 years ago
7 years ago
7 years ago
7 years ago
9 years ago
7 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
7 years ago
7 years ago
9 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
9 years ago
7 years ago
7 years ago
9 years ago
9 years ago
9 years ago
9 years ago
8 years ago
9 years ago
7 years ago
9 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
8 years ago
9 years ago
7 years ago
9 years ago
9 years ago
9 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
8 years ago
7 years ago
8 years ago
7 years ago
8 years ago
7 years ago
8 years ago
  1. package p2p
  2. import (
  3. "fmt"
  4. "math"
  5. "math/rand"
  6. "net"
  7. "time"
  8. "github.com/pkg/errors"
  9. crypto "github.com/tendermint/go-crypto"
  10. cfg "github.com/tendermint/tendermint/config"
  11. cmn "github.com/tendermint/tmlibs/common"
  12. )
  13. const (
  14. // wait a random amount of time from this interval
  15. // before dialing peers or reconnecting to help prevent DoS
  16. dialRandomizerIntervalMilliseconds = 3000
  17. // repeatedly try to reconnect for a few minutes
  18. // ie. 5 * 20 = 100s
  19. reconnectAttempts = 20
  20. reconnectInterval = 5 * time.Second
  21. // then move into exponential backoff mode for ~1day
  22. // ie. 3**10 = 16hrs
  23. reconnectBackOffAttempts = 10
  24. reconnectBackOffBaseSeconds = 3
  25. )
  26. var (
  27. ErrSwitchDuplicatePeer = errors.New("Duplicate peer")
  28. ErrSwitchConnectToSelf = errors.New("Connect to self")
  29. )
  30. //-----------------------------------------------------------------------------
  31. // `Switch` handles peer connections and exposes an API to receive incoming messages
  32. // on `Reactors`. Each `Reactor` is responsible for handling incoming messages of one
  33. // or more `Channels`. So while sending outgoing messages is typically performed on the peer,
  34. // incoming messages are received on the reactor.
  35. type Switch struct {
  36. cmn.BaseService
  37. config *cfg.P2PConfig
  38. peerConfig *PeerConfig
  39. listeners []Listener
  40. reactors map[string]Reactor
  41. chDescs []*ChannelDescriptor
  42. reactorsByCh map[byte]Reactor
  43. peers *PeerSet
  44. dialing *cmn.CMap
  45. nodeInfo *NodeInfo // our node info
  46. nodeKey *NodeKey // our node privkey
  47. filterConnByAddr func(net.Addr) error
  48. filterConnByPubKey func(crypto.PubKey) error
  49. rng *rand.Rand // seed for randomizing dial times and orders
  50. }
  51. func NewSwitch(config *cfg.P2PConfig) *Switch {
  52. sw := &Switch{
  53. config: config,
  54. peerConfig: DefaultPeerConfig(),
  55. reactors: make(map[string]Reactor),
  56. chDescs: make([]*ChannelDescriptor, 0),
  57. reactorsByCh: make(map[byte]Reactor),
  58. peers: NewPeerSet(),
  59. dialing: cmn.NewCMap(),
  60. nodeInfo: nil,
  61. }
  62. // Ensure we have a completely undeterministic PRNG. cmd.RandInt64() draws
  63. // from a seed that's initialized with OS entropy on process start.
  64. sw.rng = rand.New(rand.NewSource(cmn.RandInt64()))
  65. // TODO: collapse the peerConfig into the config ?
  66. sw.peerConfig.MConfig.flushThrottle = time.Duration(config.FlushThrottleTimeout) * time.Millisecond
  67. sw.peerConfig.MConfig.SendRate = config.SendRate
  68. sw.peerConfig.MConfig.RecvRate = config.RecvRate
  69. sw.peerConfig.MConfig.maxMsgPacketPayloadSize = config.MaxMsgPacketPayloadSize
  70. sw.BaseService = *cmn.NewBaseService(nil, "P2P Switch", sw)
  71. return sw
  72. }
  73. //---------------------------------------------------------------------
  74. // Switch setup
  75. // AddReactor adds the given reactor to the switch.
  76. // NOTE: Not goroutine safe.
  77. func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor {
  78. // Validate the reactor.
  79. // No two reactors can share the same channel.
  80. reactorChannels := reactor.GetChannels()
  81. for _, chDesc := range reactorChannels {
  82. chID := chDesc.ID
  83. if sw.reactorsByCh[chID] != nil {
  84. cmn.PanicSanity(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor))
  85. }
  86. sw.chDescs = append(sw.chDescs, chDesc)
  87. sw.reactorsByCh[chID] = reactor
  88. }
  89. sw.reactors[name] = reactor
  90. reactor.SetSwitch(sw)
  91. return reactor
  92. }
  93. // Reactors returns a map of reactors registered on the switch.
  94. // NOTE: Not goroutine safe.
  95. func (sw *Switch) Reactors() map[string]Reactor {
  96. return sw.reactors
  97. }
  98. // Reactor returns the reactor with the given name.
  99. // NOTE: Not goroutine safe.
  100. func (sw *Switch) Reactor(name string) Reactor {
  101. return sw.reactors[name]
  102. }
  103. // AddListener adds the given listener to the switch for listening to incoming peer connections.
  104. // NOTE: Not goroutine safe.
  105. func (sw *Switch) AddListener(l Listener) {
  106. sw.listeners = append(sw.listeners, l)
  107. }
  108. // Listeners returns the list of listeners the switch listens on.
  109. // NOTE: Not goroutine safe.
  110. func (sw *Switch) Listeners() []Listener {
  111. return sw.listeners
  112. }
  113. // IsListening returns true if the switch has at least one listener.
  114. // NOTE: Not goroutine safe.
  115. func (sw *Switch) IsListening() bool {
  116. return len(sw.listeners) > 0
  117. }
  118. // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes.
  119. // NOTE: Not goroutine safe.
  120. func (sw *Switch) SetNodeInfo(nodeInfo *NodeInfo) {
  121. sw.nodeInfo = nodeInfo
  122. }
  123. // NodeInfo returns the switch's NodeInfo.
  124. // NOTE: Not goroutine safe.
  125. func (sw *Switch) NodeInfo() *NodeInfo {
  126. return sw.nodeInfo
  127. }
  128. // SetNodeKey sets the switch's private key for authenticated encryption.
  129. // NOTE: Overwrites sw.nodeInfo.PubKey.
  130. // NOTE: Not goroutine safe.
  131. func (sw *Switch) SetNodeKey(nodeKey *NodeKey) {
  132. sw.nodeKey = nodeKey
  133. if sw.nodeInfo != nil {
  134. sw.nodeInfo.PubKey = nodeKey.PubKey()
  135. }
  136. }
  137. //---------------------------------------------------------------------
  138. // Service start/stop
  139. // OnStart implements BaseService. It starts all the reactors, peers, and listeners.
  140. func (sw *Switch) OnStart() error {
  141. // Start reactors
  142. for _, reactor := range sw.reactors {
  143. err := reactor.Start()
  144. if err != nil {
  145. return errors.Wrapf(err, "failed to start %v", reactor)
  146. }
  147. }
  148. // Start listeners
  149. for _, listener := range sw.listeners {
  150. go sw.listenerRoutine(listener)
  151. }
  152. return nil
  153. }
  154. // OnStop implements BaseService. It stops all listeners, peers, and reactors.
  155. func (sw *Switch) OnStop() {
  156. // Stop listeners
  157. for _, listener := range sw.listeners {
  158. listener.Stop()
  159. }
  160. sw.listeners = nil
  161. // Stop peers
  162. for _, peer := range sw.peers.List() {
  163. peer.Stop()
  164. sw.peers.Remove(peer)
  165. }
  166. // Stop reactors
  167. sw.Logger.Debug("Switch: Stopping reactors")
  168. for _, reactor := range sw.reactors {
  169. reactor.Stop()
  170. }
  171. }
  172. //---------------------------------------------------------------------
  173. // Peers
  174. // Peers returns the set of peers that are connected to the switch.
  175. func (sw *Switch) Peers() IPeerSet {
  176. return sw.peers
  177. }
  178. // NumPeers returns the count of outbound/inbound and outbound-dialing peers.
  179. func (sw *Switch) NumPeers() (outbound, inbound, dialing int) {
  180. peers := sw.peers.List()
  181. for _, peer := range peers {
  182. if peer.IsOutbound() {
  183. outbound++
  184. } else {
  185. inbound++
  186. }
  187. }
  188. dialing = sw.dialing.Size()
  189. return
  190. }
  191. // Broadcast runs a go routine for each attempted send, which will block
  192. // trying to send for defaultSendTimeoutSeconds. Returns a channel
  193. // which receives success values for each attempted send (false if times out).
  194. // NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved.
  195. // TODO: Something more intelligent.
  196. func (sw *Switch) Broadcast(chID byte, msg interface{}) chan bool {
  197. successChan := make(chan bool, len(sw.peers.List()))
  198. sw.Logger.Debug("Broadcast", "channel", chID, "msg", msg)
  199. for _, peer := range sw.peers.List() {
  200. go func(peer Peer) {
  201. success := peer.Send(chID, msg)
  202. successChan <- success
  203. }(peer)
  204. }
  205. return successChan
  206. }
  207. // StopPeerForError disconnects from a peer due to external error.
  208. // If the peer is persistent, it will attempt to reconnect.
  209. // TODO: make record depending on reason.
  210. func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) {
  211. sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason)
  212. sw.stopAndRemovePeer(peer, reason)
  213. if peer.IsPersistent() {
  214. go sw.reconnectToPeer(peer)
  215. }
  216. }
  217. // StopPeerGracefully disconnects from a peer gracefully.
  218. // TODO: handle graceful disconnects.
  219. func (sw *Switch) StopPeerGracefully(peer Peer) {
  220. sw.Logger.Info("Stopping peer gracefully")
  221. sw.stopAndRemovePeer(peer, nil)
  222. }
  223. func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) {
  224. sw.peers.Remove(peer)
  225. peer.Stop()
  226. for _, reactor := range sw.reactors {
  227. reactor.RemovePeer(peer, reason)
  228. }
  229. }
  230. // reconnectToPeer tries to reconnect to the peer, first repeatedly
  231. // with a fixed interval, then with exponential backoff.
  232. // If no success after all that, it stops trying, and leaves it
  233. // to the PEX/Addrbook to find the peer again
  234. func (sw *Switch) reconnectToPeer(peer Peer) {
  235. netAddr := peer.NodeInfo().NetAddress()
  236. start := time.Now()
  237. sw.Logger.Info("Reconnecting to peer", "peer", peer)
  238. for i := 0; i < reconnectAttempts; i++ {
  239. if !sw.IsRunning() {
  240. return
  241. }
  242. peer, err := sw.DialPeerWithAddress(netAddr, true)
  243. if err != nil {
  244. sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "peer", peer)
  245. // sleep a set amount
  246. sw.randomSleep(reconnectInterval)
  247. continue
  248. } else {
  249. sw.Logger.Info("Reconnected to peer", "peer", peer)
  250. return
  251. }
  252. }
  253. sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff",
  254. "peer", peer, "elapsed", time.Since(start))
  255. for i := 0; i < reconnectBackOffAttempts; i++ {
  256. if !sw.IsRunning() {
  257. return
  258. }
  259. // sleep an exponentially increasing amount
  260. sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i))
  261. sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second)
  262. peer, err := sw.DialPeerWithAddress(netAddr, true)
  263. if err != nil {
  264. sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "peer", peer)
  265. continue
  266. } else {
  267. sw.Logger.Info("Reconnected to peer", "peer", peer)
  268. return
  269. }
  270. }
  271. sw.Logger.Error("Failed to reconnect to peer. Giving up", "peer", peer, "elapsed", time.Since(start))
  272. }
  273. //---------------------------------------------------------------------
  274. // Dialing
  275. // IsDialing returns true if the switch is currently dialing the given ID.
  276. func (sw *Switch) IsDialing(id ID) bool {
  277. return sw.dialing.Has(string(id))
  278. }
  279. // DialPeersAsync dials a list of peers asynchronously in random order (optionally, making them persistent).
  280. func (sw *Switch) DialPeersAsync(addrBook *AddrBook, peers []string, persistent bool) error {
  281. netAddrs, errs := NewNetAddressStrings(peers)
  282. // TODO: IDs
  283. for _, err := range errs {
  284. sw.Logger.Error("Error in peer's address", "err", err)
  285. }
  286. if addrBook != nil {
  287. // add peers to `addrBook`
  288. ourAddrS := sw.nodeInfo.ListenAddr
  289. ourAddr, _ := NewNetAddressString(ourAddrS)
  290. for _, netAddr := range netAddrs {
  291. // do not add ourselves
  292. if netAddr.Equals(ourAddr) {
  293. continue
  294. }
  295. addrBook.AddAddress(netAddr, ourAddr)
  296. }
  297. addrBook.Save()
  298. }
  299. // permute the list, dial them in random order.
  300. perm := sw.rng.Perm(len(netAddrs))
  301. for i := 0; i < len(perm); i++ {
  302. go func(i int) {
  303. sw.randomSleep(0)
  304. j := perm[i]
  305. peer, err := sw.DialPeerWithAddress(netAddrs[j], persistent)
  306. if err != nil {
  307. sw.Logger.Error("Error dialing peer", "err", err)
  308. } else {
  309. sw.Logger.Info("Connected to peer", "peer", peer)
  310. }
  311. }(i)
  312. }
  313. return nil
  314. }
  315. // DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects successfully.
  316. // If `persistent == true`, the switch will always try to reconnect to this peer if the connection ever fails.
  317. func (sw *Switch) DialPeerWithAddress(addr *NetAddress, persistent bool) (Peer, error) {
  318. sw.dialing.Set(string(addr.ID), addr)
  319. defer sw.dialing.Delete(string(addr.ID))
  320. sw.Logger.Info("Dialing peer", "address", addr)
  321. peer, err := newOutboundPeer(addr, sw.reactorsByCh, sw.chDescs, sw.StopPeerForError, sw.nodeKey.PrivKey, sw.peerConfig)
  322. if err != nil {
  323. sw.Logger.Error("Failed to dial peer", "address", addr, "err", err)
  324. return nil, err
  325. }
  326. peer.SetLogger(sw.Logger.With("peer", addr))
  327. // authenticate peer
  328. if addr.ID == "" {
  329. peer.Logger.Info("Dialed peer with unknown ID - unable to authenticate", "addr", addr)
  330. } else if addr.ID != peer.ID() {
  331. return nil, fmt.Errorf("Failed to authenticate peer %v. Connected to peer with ID %s", addr, peer.ID())
  332. }
  333. if persistent {
  334. peer.makePersistent()
  335. }
  336. err = sw.addPeer(peer)
  337. if err != nil {
  338. sw.Logger.Error("Failed to add peer", "address", addr, "err", err)
  339. peer.CloseConn()
  340. return nil, err
  341. }
  342. sw.Logger.Info("Dialed and added peer", "address", addr, "peer", peer)
  343. return peer, nil
  344. }
  345. // sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds]
  346. func (sw *Switch) randomSleep(interval time.Duration) {
  347. r := time.Duration(sw.rng.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond
  348. time.Sleep(r + interval)
  349. }
  350. //------------------------------------------------------------------------------------
  351. // Connection filtering
  352. // FilterConnByAddr returns an error if connecting to the given address is forbidden.
  353. func (sw *Switch) FilterConnByAddr(addr net.Addr) error {
  354. if sw.filterConnByAddr != nil {
  355. return sw.filterConnByAddr(addr)
  356. }
  357. return nil
  358. }
  359. // FilterConnByPubKey returns an error if connecting to the given public key is forbidden.
  360. func (sw *Switch) FilterConnByPubKey(pubkey crypto.PubKey) error {
  361. if sw.filterConnByPubKey != nil {
  362. return sw.filterConnByPubKey(pubkey)
  363. }
  364. return nil
  365. }
  366. // SetAddrFilter sets the function for filtering connections by address.
  367. func (sw *Switch) SetAddrFilter(f func(net.Addr) error) {
  368. sw.filterConnByAddr = f
  369. }
  370. // SetPubKeyFilter sets the function for filtering connections by public key.
  371. func (sw *Switch) SetPubKeyFilter(f func(crypto.PubKey) error) {
  372. sw.filterConnByPubKey = f
  373. }
  374. //------------------------------------------------------------------------------------
  375. func (sw *Switch) listenerRoutine(l Listener) {
  376. for {
  377. inConn, ok := <-l.Connections()
  378. if !ok {
  379. break
  380. }
  381. // ignore connection if we already have enough
  382. maxPeers := sw.config.MaxNumPeers
  383. if maxPeers <= sw.peers.Size() {
  384. sw.Logger.Info("Ignoring inbound connection: already have enough peers", "address", inConn.RemoteAddr().String(), "numPeers", sw.peers.Size(), "max", maxPeers)
  385. continue
  386. }
  387. // New inbound connection!
  388. err := sw.addInboundPeerWithConfig(inConn, sw.peerConfig)
  389. if err != nil {
  390. sw.Logger.Info("Ignoring inbound connection: error while adding peer", "address", inConn.RemoteAddr().String(), "err", err)
  391. continue
  392. }
  393. // NOTE: We don't yet have the listening port of the
  394. // remote (if they have a listener at all).
  395. // The peerHandshake will handle that.
  396. }
  397. // cleanup
  398. }
  399. func (sw *Switch) addInboundPeerWithConfig(conn net.Conn, config *PeerConfig) error {
  400. peer, err := newInboundPeer(conn, sw.reactorsByCh, sw.chDescs, sw.StopPeerForError, sw.nodeKey.PrivKey, config)
  401. if err != nil {
  402. if err := conn.Close(); err != nil {
  403. sw.Logger.Error("Error closing connection", "err", err)
  404. }
  405. return err
  406. }
  407. peer.SetLogger(sw.Logger.With("peer", conn.RemoteAddr()))
  408. if err = sw.addPeer(peer); err != nil {
  409. peer.CloseConn()
  410. return err
  411. }
  412. return nil
  413. }
  414. // addPeer checks the given peer's validity, performs a handshake, and adds the
  415. // peer to the switch and to all registered reactors.
  416. // We already have an authenticated SecretConnection with the peer.
  417. // NOTE: This performs a blocking handshake before the peer is added.
  418. // NOTE: If error is returned, caller is responsible for calling peer.CloseConn()
  419. func (sw *Switch) addPeer(peer *peer) error {
  420. // Avoid self
  421. if sw.nodeKey.ID() == peer.ID() {
  422. return ErrSwitchConnectToSelf
  423. }
  424. // Filter peer against white list
  425. if err := sw.FilterConnByAddr(peer.Addr()); err != nil {
  426. return err
  427. }
  428. if err := sw.FilterConnByPubKey(peer.PubKey()); err != nil {
  429. return err
  430. }
  431. // Exchange NodeInfo with the peer
  432. if err := peer.HandshakeTimeout(sw.nodeInfo, time.Duration(sw.peerConfig.HandshakeTimeout*time.Second)); err != nil {
  433. return err
  434. }
  435. // Avoid duplicate
  436. if sw.peers.Has(peer.ID()) {
  437. return ErrSwitchDuplicatePeer
  438. }
  439. // Check version, chain id
  440. if err := sw.nodeInfo.CompatibleWith(peer.NodeInfo()); err != nil {
  441. return err
  442. }
  443. // Start peer
  444. if sw.IsRunning() {
  445. sw.startInitPeer(peer)
  446. }
  447. // Add the peer to .peers.
  448. // We start it first so that a peer in the list is safe to Stop.
  449. // It should not err since we already checked peers.Has().
  450. if err := sw.peers.Add(peer); err != nil {
  451. return err
  452. }
  453. sw.Logger.Info("Added peer", "peer", peer)
  454. return nil
  455. }
  456. func (sw *Switch) startInitPeer(peer *peer) {
  457. err := peer.Start() // spawn send/recv routines
  458. if err != nil {
  459. // Should never happen
  460. sw.Logger.Error("Error starting peer", "peer", peer, "err", err)
  461. }
  462. for _, reactor := range sw.reactors {
  463. reactor.AddPeer(peer)
  464. }
  465. }