You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

606 lines
17 KiB

9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
7 years ago
9 years ago
9 years ago
8 years ago
9 years ago
8 years ago
9 years ago
7 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
8 years ago
9 years ago
8 years ago
9 years ago
9 years ago
8 years ago
9 years ago
8 years ago
9 years ago
8 years ago
9 years ago
8 years ago
9 years ago
8 years ago
9 years ago
8 years ago
9 years ago
8 years ago
9 years ago
8 years ago
9 years ago
9 years ago
8 years ago
9 years ago
9 years ago
9 years ago
8 years ago
9 years ago
9 years ago
9 years ago
7 years ago
9 years ago
9 years ago
9 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
9 years ago
8 years ago
9 years ago
8 years ago
8 years ago
8 years ago
8 years ago
7 years ago
9 years ago
8 years ago
8 years ago
7 years ago
9 years ago
8 years ago
9 years ago
9 years ago
9 years ago
9 years ago
8 years ago
9 years ago
8 years ago
9 years ago
8 years ago
9 years ago
8 years ago
9 years ago
7 years ago
9 years ago
9 years ago
7 years ago
9 years ago
9 years ago
8 years ago
9 years ago
7 years ago
8 years ago
8 years ago
9 years ago
8 years ago
9 years ago
7 years ago
8 years ago
9 years ago
7 years ago
9 years ago
8 years ago
9 years ago
9 years ago
9 years ago
9 years ago
7 years ago
7 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
  1. package p2p
  2. import (
  3. "errors"
  4. "fmt"
  5. "math/rand"
  6. "net"
  7. "time"
  8. crypto "github.com/tendermint/go-crypto"
  9. cfg "github.com/tendermint/tendermint/config"
  10. cmn "github.com/tendermint/tmlibs/common"
  11. )
  12. const (
  13. reconnectAttempts = 30
  14. reconnectInterval = 3 * time.Second
  15. )
  16. type Reactor interface {
  17. cmn.Service // Start, Stop
  18. SetSwitch(*Switch)
  19. GetChannels() []*ChannelDescriptor
  20. AddPeer(peer Peer)
  21. RemovePeer(peer Peer, reason interface{})
  22. Receive(chID byte, peer Peer, msgBytes []byte) // CONTRACT: msgBytes are not nil
  23. }
  24. //--------------------------------------
  25. type BaseReactor struct {
  26. cmn.BaseService // Provides Start, Stop, .Quit
  27. Switch *Switch
  28. }
  29. func NewBaseReactor(name string, impl Reactor) *BaseReactor {
  30. return &BaseReactor{
  31. BaseService: *cmn.NewBaseService(nil, name, impl),
  32. Switch: nil,
  33. }
  34. }
  35. func (br *BaseReactor) SetSwitch(sw *Switch) {
  36. br.Switch = sw
  37. }
  38. func (_ *BaseReactor) GetChannels() []*ChannelDescriptor { return nil }
  39. func (_ *BaseReactor) AddPeer(peer Peer) {}
  40. func (_ *BaseReactor) RemovePeer(peer Peer, reason interface{}) {}
  41. func (_ *BaseReactor) Receive(chID byte, peer Peer, msgBytes []byte) {}
  42. //-----------------------------------------------------------------------------
  43. /*
  44. The `Switch` handles peer connections and exposes an API to receive incoming messages
  45. on `Reactors`. Each `Reactor` is responsible for handling incoming messages of one
  46. or more `Channels`. So while sending outgoing messages is typically performed on the peer,
  47. incoming messages are received on the reactor.
  48. */
  49. type Switch struct {
  50. cmn.BaseService
  51. config *cfg.P2PConfig
  52. peerConfig *PeerConfig
  53. listeners []Listener
  54. reactors map[string]Reactor
  55. chDescs []*ChannelDescriptor
  56. reactorsByCh map[byte]Reactor
  57. peers *PeerSet
  58. dialing *cmn.CMap
  59. nodeInfo *NodeInfo // our node info
  60. nodePrivKey crypto.PrivKeyEd25519 // our node privkey
  61. filterConnByAddr func(net.Addr) error
  62. filterConnByPubKey func(crypto.PubKeyEd25519) error
  63. }
  64. var (
  65. ErrSwitchDuplicatePeer = errors.New("Duplicate peer")
  66. )
  67. func NewSwitch(config *cfg.P2PConfig) *Switch {
  68. sw := &Switch{
  69. config: config,
  70. peerConfig: DefaultPeerConfig(),
  71. reactors: make(map[string]Reactor),
  72. chDescs: make([]*ChannelDescriptor, 0),
  73. reactorsByCh: make(map[byte]Reactor),
  74. peers: NewPeerSet(),
  75. dialing: cmn.NewCMap(),
  76. nodeInfo: nil,
  77. }
  78. // TODO: collapse the peerConfig into the config ?
  79. sw.peerConfig.MConfig.flushThrottle = time.Duration(config.FlushThrottleTimeout) * time.Millisecond
  80. sw.peerConfig.MConfig.SendRate = config.SendRate
  81. sw.peerConfig.MConfig.RecvRate = config.RecvRate
  82. sw.peerConfig.MConfig.maxMsgPacketPayloadSize = config.MaxMsgPacketPayloadSize
  83. sw.BaseService = *cmn.NewBaseService(nil, "P2P Switch", sw)
  84. return sw
  85. }
  86. // AddReactor adds the given reactor to the switch.
  87. // NOTE: Not goroutine safe.
  88. func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor {
  89. // Validate the reactor.
  90. // No two reactors can share the same channel.
  91. reactorChannels := reactor.GetChannels()
  92. for _, chDesc := range reactorChannels {
  93. chID := chDesc.ID
  94. if sw.reactorsByCh[chID] != nil {
  95. cmn.PanicSanity(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor))
  96. }
  97. sw.chDescs = append(sw.chDescs, chDesc)
  98. sw.reactorsByCh[chID] = reactor
  99. }
  100. sw.reactors[name] = reactor
  101. reactor.SetSwitch(sw)
  102. return reactor
  103. }
  104. // Reactors returns a map of reactors registered on the switch.
  105. // NOTE: Not goroutine safe.
  106. func (sw *Switch) Reactors() map[string]Reactor {
  107. return sw.reactors
  108. }
  109. // Reactor returns the reactor with the given name.
  110. // NOTE: Not goroutine safe.
  111. func (sw *Switch) Reactor(name string) Reactor {
  112. return sw.reactors[name]
  113. }
  114. // AddListener adds the given listener to the switch for listening to incoming peer connections.
  115. // NOTE: Not goroutine safe.
  116. func (sw *Switch) AddListener(l Listener) {
  117. sw.listeners = append(sw.listeners, l)
  118. }
  119. // Listeners returns the list of listeners the switch listens on.
  120. // NOTE: Not goroutine safe.
  121. func (sw *Switch) Listeners() []Listener {
  122. return sw.listeners
  123. }
  124. // IsListening returns true if the switch has at least one listener.
  125. // NOTE: Not goroutine safe.
  126. func (sw *Switch) IsListening() bool {
  127. return len(sw.listeners) > 0
  128. }
  129. // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes.
  130. // NOTE: Not goroutine safe.
  131. func (sw *Switch) SetNodeInfo(nodeInfo *NodeInfo) {
  132. sw.nodeInfo = nodeInfo
  133. }
  134. // NodeInfo returns the switch's NodeInfo.
  135. // NOTE: Not goroutine safe.
  136. func (sw *Switch) NodeInfo() *NodeInfo {
  137. return sw.nodeInfo
  138. }
  139. // SetNodePrivKey sets the switch's private key for authenticated encryption.
  140. // NOTE: Overwrites sw.nodeInfo.PubKey.
  141. // NOTE: Not goroutine safe.
  142. func (sw *Switch) SetNodePrivKey(nodePrivKey crypto.PrivKeyEd25519) {
  143. sw.nodePrivKey = nodePrivKey
  144. if sw.nodeInfo != nil {
  145. sw.nodeInfo.PubKey = nodePrivKey.PubKey().Unwrap().(crypto.PubKeyEd25519)
  146. }
  147. }
  148. // OnStart implements BaseService. It starts all the reactors, peers, and listeners.
  149. func (sw *Switch) OnStart() error {
  150. if err := sw.BaseService.OnStart(); err != nil {
  151. return err
  152. }
  153. // Start reactors
  154. for _, reactor := range sw.reactors {
  155. _, err := reactor.Start()
  156. if err != nil {
  157. return err
  158. }
  159. }
  160. // Start listeners
  161. for _, listener := range sw.listeners {
  162. go sw.listenerRoutine(listener)
  163. }
  164. return nil
  165. }
  166. // OnStop implements BaseService. It stops all listeners, peers, and reactors.
  167. func (sw *Switch) OnStop() {
  168. sw.BaseService.OnStop()
  169. // Stop listeners
  170. for _, listener := range sw.listeners {
  171. listener.Stop()
  172. }
  173. sw.listeners = nil
  174. // Stop peers
  175. for _, peer := range sw.peers.List() {
  176. peer.Stop()
  177. sw.peers.Remove(peer)
  178. }
  179. // Stop reactors
  180. for _, reactor := range sw.reactors {
  181. reactor.Stop()
  182. }
  183. }
  184. // addPeer checks the given peer's validity, performs a handshake, and adds the
  185. // peer to the switch and to all registered reactors.
  186. // NOTE: This performs a blocking handshake before the peer is added.
  187. // NOTE: If error is returned, caller is responsible for calling peer.CloseConn()
  188. func (sw *Switch) addPeer(peer *peer) error {
  189. if err := sw.FilterConnByAddr(peer.Addr()); err != nil {
  190. return err
  191. }
  192. if err := sw.FilterConnByPubKey(peer.PubKey()); err != nil {
  193. return err
  194. }
  195. if err := peer.HandshakeTimeout(sw.nodeInfo, time.Duration(sw.peerConfig.HandshakeTimeout*time.Second)); err != nil {
  196. return err
  197. }
  198. // Avoid self
  199. if sw.nodeInfo.PubKey.Equals(peer.PubKey().Wrap()) {
  200. return errors.New("Ignoring connection from self")
  201. }
  202. // Check version, chain id
  203. if err := sw.nodeInfo.CompatibleWith(peer.NodeInfo()); err != nil {
  204. return err
  205. }
  206. // Check for duplicate peer
  207. if sw.peers.Has(peer.Key()) {
  208. return ErrSwitchDuplicatePeer
  209. }
  210. // Start peer
  211. if sw.IsRunning() {
  212. sw.startInitPeer(peer)
  213. }
  214. // Add the peer to .peers.
  215. // We start it first so that a peer in the list is safe to Stop.
  216. // It should not err since we already checked peers.Has().
  217. if err := sw.peers.Add(peer); err != nil {
  218. return err
  219. }
  220. sw.Logger.Info("Added peer", "peer", peer)
  221. return nil
  222. }
  223. // FilterConnByAddr returns an error if connecting to the given address is forbidden.
  224. func (sw *Switch) FilterConnByAddr(addr net.Addr) error {
  225. if sw.filterConnByAddr != nil {
  226. return sw.filterConnByAddr(addr)
  227. }
  228. return nil
  229. }
  230. // FilterConnByPubKey returns an error if connecting to the given public key is forbidden.
  231. func (sw *Switch) FilterConnByPubKey(pubkey crypto.PubKeyEd25519) error {
  232. if sw.filterConnByPubKey != nil {
  233. return sw.filterConnByPubKey(pubkey)
  234. }
  235. return nil
  236. }
  237. // SetAddrFilter sets the function for filtering connections by address.
  238. func (sw *Switch) SetAddrFilter(f func(net.Addr) error) {
  239. sw.filterConnByAddr = f
  240. }
  241. // SetPubKeyFilter sets the function for filtering connections by public key.
  242. func (sw *Switch) SetPubKeyFilter(f func(crypto.PubKeyEd25519) error) {
  243. sw.filterConnByPubKey = f
  244. }
  245. func (sw *Switch) startInitPeer(peer *peer) {
  246. _, err := peer.Start() // spawn send/recv routines
  247. if err != nil {
  248. sw.Logger.Error("Error starting peer", "err", err)
  249. }
  250. for _, reactor := range sw.reactors {
  251. reactor.AddPeer(peer)
  252. }
  253. }
  254. // DialSeeds dials a list of seeds asynchronously in random order.
  255. func (sw *Switch) DialSeeds(addrBook *AddrBook, seeds []string) error {
  256. netAddrs, err := NewNetAddressStrings(seeds)
  257. if err != nil {
  258. return err
  259. }
  260. if addrBook != nil {
  261. // add seeds to `addrBook`
  262. ourAddrS := sw.nodeInfo.ListenAddr
  263. ourAddr, _ := NewNetAddressString(ourAddrS)
  264. for _, netAddr := range netAddrs {
  265. // do not add ourselves
  266. if netAddr.Equals(ourAddr) {
  267. continue
  268. }
  269. addrBook.AddAddress(netAddr, ourAddr)
  270. }
  271. addrBook.Save()
  272. }
  273. // Ensure we have a completely undeterministic PRNG. cmd.RandInt64() draws
  274. // from a seed that's initialized with OS entropy on process start.
  275. rng := rand.New(rand.NewSource(cmn.RandInt64()))
  276. // permute the list, dial them in random order.
  277. perm := rng.Perm(len(netAddrs))
  278. for i := 0; i < len(perm); i++ {
  279. go func(i int) {
  280. time.Sleep(time.Duration(rng.Int63n(3000)) * time.Millisecond)
  281. j := perm[i]
  282. sw.dialSeed(netAddrs[j])
  283. }(i)
  284. }
  285. return nil
  286. }
  287. func (sw *Switch) dialSeed(addr *NetAddress) {
  288. peer, err := sw.DialPeerWithAddress(addr, true)
  289. if err != nil {
  290. sw.Logger.Error("Error dialing seed", "err", err)
  291. } else {
  292. sw.Logger.Info("Connected to seed", "peer", peer)
  293. }
  294. }
  295. // DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects successfully.
  296. // If `persistent == true`, the switch will always try to reconnect to this peer if the connection ever fails.
  297. func (sw *Switch) DialPeerWithAddress(addr *NetAddress, persistent bool) (Peer, error) {
  298. sw.dialing.Set(addr.IP.String(), addr)
  299. defer sw.dialing.Delete(addr.IP.String())
  300. sw.Logger.Info("Dialing peer", "address", addr)
  301. peer, err := newOutboundPeer(addr, sw.reactorsByCh, sw.chDescs, sw.StopPeerForError, sw.nodePrivKey, sw.peerConfig)
  302. if err != nil {
  303. sw.Logger.Error("Failed to dial peer", "address", addr, "err", err)
  304. return nil, err
  305. }
  306. peer.SetLogger(sw.Logger.With("peer", addr))
  307. if persistent {
  308. peer.makePersistent()
  309. }
  310. err = sw.addPeer(peer)
  311. if err != nil {
  312. sw.Logger.Error("Failed to add peer", "address", addr, "err", err)
  313. peer.CloseConn()
  314. return nil, err
  315. }
  316. sw.Logger.Info("Dialed and added peer", "address", addr, "peer", peer)
  317. return peer, nil
  318. }
  319. // IsDialing returns true if the switch is currently dialing the given address.
  320. func (sw *Switch) IsDialing(addr *NetAddress) bool {
  321. return sw.dialing.Has(addr.IP.String())
  322. }
  323. // Broadcast runs a go routine for each attempted send, which will block
  324. // trying to send for defaultSendTimeoutSeconds. Returns a channel
  325. // which receives success values for each attempted send (false if times out).
  326. // NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved.
  327. // TODO: Something more intelligent.
  328. func (sw *Switch) Broadcast(chID byte, msg interface{}) chan bool {
  329. successChan := make(chan bool, len(sw.peers.List()))
  330. sw.Logger.Debug("Broadcast", "channel", chID, "msg", msg)
  331. for _, peer := range sw.peers.List() {
  332. go func(peer Peer) {
  333. success := peer.Send(chID, msg)
  334. successChan <- success
  335. }(peer)
  336. }
  337. return successChan
  338. }
  339. // NumPeers returns the count of outbound/inbound and outbound-dialing peers.
  340. func (sw *Switch) NumPeers() (outbound, inbound, dialing int) {
  341. peers := sw.peers.List()
  342. for _, peer := range peers {
  343. if peer.IsOutbound() {
  344. outbound++
  345. } else {
  346. inbound++
  347. }
  348. }
  349. dialing = sw.dialing.Size()
  350. return
  351. }
  352. // Peers returns the set of peers that are connected to the switch.
  353. func (sw *Switch) Peers() IPeerSet {
  354. return sw.peers
  355. }
  356. // StopPeerForError disconnects from a peer due to external error.
  357. // If the peer is persistent, it will attempt to reconnect.
  358. // TODO: make record depending on reason.
  359. func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) {
  360. addr, _ := NewNetAddressString(peer.NodeInfo().RemoteAddr)
  361. sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason)
  362. sw.stopAndRemovePeer(peer, reason)
  363. if peer.IsPersistent() {
  364. go func() {
  365. sw.Logger.Info("Reconnecting to peer", "peer", peer)
  366. for i := 1; i < reconnectAttempts; i++ {
  367. if !sw.IsRunning() {
  368. return
  369. }
  370. peer, err := sw.DialPeerWithAddress(addr, true)
  371. if err != nil {
  372. if i == reconnectAttempts {
  373. sw.Logger.Info("Error reconnecting to peer. Giving up", "tries", i, "err", err)
  374. return
  375. }
  376. sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err)
  377. time.Sleep(reconnectInterval)
  378. continue
  379. }
  380. sw.Logger.Info("Reconnected to peer", "peer", peer)
  381. return
  382. }
  383. }()
  384. }
  385. }
  386. // StopPeerGracefully disconnects from a peer gracefully.
  387. // TODO: handle graceful disconnects.
  388. func (sw *Switch) StopPeerGracefully(peer Peer) {
  389. sw.Logger.Info("Stopping peer gracefully")
  390. sw.stopAndRemovePeer(peer, nil)
  391. }
  392. func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) {
  393. sw.peers.Remove(peer)
  394. peer.Stop()
  395. for _, reactor := range sw.reactors {
  396. reactor.RemovePeer(peer, reason)
  397. }
  398. }
  399. func (sw *Switch) listenerRoutine(l Listener) {
  400. for {
  401. inConn, ok := <-l.Connections()
  402. if !ok {
  403. break
  404. }
  405. // ignore connection if we already have enough
  406. maxPeers := sw.config.MaxNumPeers
  407. if maxPeers <= sw.peers.Size() {
  408. sw.Logger.Info("Ignoring inbound connection: already have enough peers", "address", inConn.RemoteAddr().String(), "numPeers", sw.peers.Size(), "max", maxPeers)
  409. continue
  410. }
  411. // New inbound connection!
  412. err := sw.addPeerWithConnectionAndConfig(inConn, sw.peerConfig)
  413. if err != nil {
  414. sw.Logger.Info("Ignoring inbound connection: error while adding peer", "address", inConn.RemoteAddr().String(), "err", err)
  415. continue
  416. }
  417. // NOTE: We don't yet have the listening port of the
  418. // remote (if they have a listener at all).
  419. // The peerHandshake will handle that.
  420. }
  421. // cleanup
  422. }
  423. //------------------------------------------------------------------
  424. // Connects switches via arbitrary net.Conn. Used for testing.
  425. // MakeConnectedSwitches returns n switches, connected according to the connect func.
  426. // If connect==Connect2Switches, the switches will be fully connected.
  427. // initSwitch defines how the i'th switch should be initialized (ie. with what reactors).
  428. // NOTE: panics if any switch fails to start.
  429. func MakeConnectedSwitches(cfg *cfg.P2PConfig, n int, initSwitch func(int, *Switch) *Switch, connect func([]*Switch, int, int)) []*Switch {
  430. switches := make([]*Switch, n)
  431. for i := 0; i < n; i++ {
  432. switches[i] = makeSwitch(cfg, i, "testing", "123.123.123", initSwitch)
  433. }
  434. if err := StartSwitches(switches); err != nil {
  435. panic(err)
  436. }
  437. for i := 0; i < n; i++ {
  438. for j := i + 1; j < n; j++ {
  439. connect(switches, i, j)
  440. }
  441. }
  442. return switches
  443. }
  444. // Connect2Switches will connect switches i and j via net.Pipe().
  445. // Blocks until a connection is established.
  446. // NOTE: caller ensures i and j are within bounds.
  447. func Connect2Switches(switches []*Switch, i, j int) {
  448. switchI := switches[i]
  449. switchJ := switches[j]
  450. c1, c2 := netPipe()
  451. doneCh := make(chan struct{})
  452. go func() {
  453. err := switchI.addPeerWithConnection(c1)
  454. if err != nil {
  455. panic(err)
  456. }
  457. doneCh <- struct{}{}
  458. }()
  459. go func() {
  460. err := switchJ.addPeerWithConnection(c2)
  461. if err != nil {
  462. panic(err)
  463. }
  464. doneCh <- struct{}{}
  465. }()
  466. <-doneCh
  467. <-doneCh
  468. }
  469. // StartSwitches calls sw.Start() for each given switch.
  470. // It returns the first encountered error.
  471. func StartSwitches(switches []*Switch) error {
  472. for _, s := range switches {
  473. _, err := s.Start() // start switch and reactors
  474. if err != nil {
  475. return err
  476. }
  477. }
  478. return nil
  479. }
  480. func makeSwitch(cfg *cfg.P2PConfig, i int, network, version string, initSwitch func(int, *Switch) *Switch) *Switch {
  481. privKey := crypto.GenPrivKeyEd25519()
  482. // new switch, add reactors
  483. // TODO: let the config be passed in?
  484. s := initSwitch(i, NewSwitch(cfg))
  485. s.SetNodeInfo(&NodeInfo{
  486. PubKey: privKey.PubKey().Unwrap().(crypto.PubKeyEd25519),
  487. Moniker: cmn.Fmt("switch%d", i),
  488. Network: network,
  489. Version: version,
  490. RemoteAddr: cmn.Fmt("%v:%v", network, rand.Intn(64512)+1023),
  491. ListenAddr: cmn.Fmt("%v:%v", network, rand.Intn(64512)+1023),
  492. })
  493. s.SetNodePrivKey(privKey)
  494. return s
  495. }
  496. func (sw *Switch) addPeerWithConnection(conn net.Conn) error {
  497. peer, err := newInboundPeer(conn, sw.reactorsByCh, sw.chDescs, sw.StopPeerForError, sw.nodePrivKey, sw.peerConfig)
  498. if err != nil {
  499. if err := conn.Close(); err != nil {
  500. sw.Logger.Error("Error closing connection", "err", err)
  501. }
  502. return err
  503. }
  504. peer.SetLogger(sw.Logger.With("peer", conn.RemoteAddr()))
  505. if err = sw.addPeer(peer); err != nil {
  506. peer.CloseConn()
  507. return err
  508. }
  509. return nil
  510. }
  511. func (sw *Switch) addPeerWithConnectionAndConfig(conn net.Conn, config *PeerConfig) error {
  512. peer, err := newInboundPeer(conn, sw.reactorsByCh, sw.chDescs, sw.StopPeerForError, sw.nodePrivKey, config)
  513. if err != nil {
  514. if err := conn.Close(); err != nil {
  515. sw.Logger.Error("Error closing connection", "err", err)
  516. }
  517. return err
  518. }
  519. peer.SetLogger(sw.Logger.With("peer", conn.RemoteAddr()))
  520. if err = sw.addPeer(peer); err != nil {
  521. peer.CloseConn()
  522. return err
  523. }
  524. return nil
  525. }