You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

599 lines
16 KiB

9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
9 years ago
7 years ago
9 years ago
9 years ago
7 years ago
9 years ago
9 years ago
7 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
7 years ago
9 years ago
7 years ago
7 years ago
7 years ago
7 years ago
9 years ago
9 years ago
7 years ago
7 years ago
9 years ago
7 years ago
9 years ago
9 years ago
9 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
7 years ago
9 years ago
7 years ago
9 years ago
7 years ago
9 years ago
9 years ago
7 years ago
9 years ago
9 years ago
9 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
  1. package p2p
  2. import (
  3. "errors"
  4. "fmt"
  5. "math/rand"
  6. "net"
  7. "time"
  8. crypto "github.com/tendermint/go-crypto"
  9. cfg "github.com/tendermint/tendermint/config"
  10. cmn "github.com/tendermint/tmlibs/common"
  11. )
  12. const (
  13. reconnectAttempts = 30
  14. reconnectInterval = 3 * time.Second
  15. )
  16. type Reactor interface {
  17. cmn.Service // Start, Stop
  18. SetSwitch(*Switch)
  19. GetChannels() []*ChannelDescriptor
  20. AddPeer(peer *Peer)
  21. RemovePeer(peer *Peer, reason interface{})
  22. Receive(chID byte, peer *Peer, msgBytes []byte)
  23. }
  24. //--------------------------------------
  25. type BaseReactor struct {
  26. cmn.BaseService // Provides Start, Stop, .Quit
  27. Switch *Switch
  28. }
  29. func NewBaseReactor(name string, impl Reactor) *BaseReactor {
  30. return &BaseReactor{
  31. BaseService: *cmn.NewBaseService(nil, name, impl),
  32. Switch: nil,
  33. }
  34. }
  35. func (br *BaseReactor) SetSwitch(sw *Switch) {
  36. br.Switch = sw
  37. }
  38. func (_ *BaseReactor) GetChannels() []*ChannelDescriptor { return nil }
  39. func (_ *BaseReactor) AddPeer(peer *Peer) {}
  40. func (_ *BaseReactor) RemovePeer(peer *Peer, reason interface{}) {}
  41. func (_ *BaseReactor) Receive(chID byte, peer *Peer, msgBytes []byte) {}
  42. //-----------------------------------------------------------------------------
  43. /*
  44. The `Switch` handles peer connections and exposes an API to receive incoming messages
  45. on `Reactors`. Each `Reactor` is responsible for handling incoming messages of one
  46. or more `Channels`. So while sending outgoing messages is typically performed on the peer,
  47. incoming messages are received on the reactor.
  48. */
  49. type Switch struct {
  50. cmn.BaseService
  51. config *cfg.P2PConfig
  52. peerConfig *PeerConfig
  53. listeners []Listener
  54. reactors map[string]Reactor
  55. chDescs []*ChannelDescriptor
  56. reactorsByCh map[byte]Reactor
  57. peers *PeerSet
  58. dialing *cmn.CMap
  59. nodeInfo *NodeInfo // our node info
  60. nodePrivKey crypto.PrivKeyEd25519 // our node privkey
  61. filterConnByAddr func(net.Addr) error
  62. filterConnByPubKey func(crypto.PubKeyEd25519) error
  63. }
  64. var (
  65. ErrSwitchDuplicatePeer = errors.New("Duplicate peer")
  66. )
  67. func NewSwitch(config *cfg.P2PConfig) *Switch {
  68. sw := &Switch{
  69. config: config,
  70. peerConfig: DefaultPeerConfig(),
  71. reactors: make(map[string]Reactor),
  72. chDescs: make([]*ChannelDescriptor, 0),
  73. reactorsByCh: make(map[byte]Reactor),
  74. peers: NewPeerSet(),
  75. dialing: cmn.NewCMap(),
  76. nodeInfo: nil,
  77. }
  78. sw.peerConfig.MConfig.flushThrottle = time.Duration(config.FlushThrottleTimeout) * time.Millisecond // TODO: collapse the peerConfig into the config ?
  79. sw.BaseService = *cmn.NewBaseService(nil, "P2P Switch", sw)
  80. return sw
  81. }
  82. // AddReactor adds the given reactor to the switch.
  83. // NOTE: Not goroutine safe.
  84. func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor {
  85. // Validate the reactor.
  86. // No two reactors can share the same channel.
  87. reactorChannels := reactor.GetChannels()
  88. for _, chDesc := range reactorChannels {
  89. chID := chDesc.ID
  90. if sw.reactorsByCh[chID] != nil {
  91. cmn.PanicSanity(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor))
  92. }
  93. sw.chDescs = append(sw.chDescs, chDesc)
  94. sw.reactorsByCh[chID] = reactor
  95. }
  96. sw.reactors[name] = reactor
  97. reactor.SetSwitch(sw)
  98. return reactor
  99. }
  100. // Reactors returns a map of reactors registered on the switch.
  101. // NOTE: Not goroutine safe.
  102. func (sw *Switch) Reactors() map[string]Reactor {
  103. return sw.reactors
  104. }
  105. // Reactor returns the reactor with the given name.
  106. // NOTE: Not goroutine safe.
  107. func (sw *Switch) Reactor(name string) Reactor {
  108. return sw.reactors[name]
  109. }
  110. // AddListener adds the given listener to the switch for listening to incoming peer connections.
  111. // NOTE: Not goroutine safe.
  112. func (sw *Switch) AddListener(l Listener) {
  113. sw.listeners = append(sw.listeners, l)
  114. }
  115. // Listeners returns the list of listeners the switch listens on.
  116. // NOTE: Not goroutine safe.
  117. func (sw *Switch) Listeners() []Listener {
  118. return sw.listeners
  119. }
  120. // IsListening returns true if the switch has at least one listener.
  121. // NOTE: Not goroutine safe.
  122. func (sw *Switch) IsListening() bool {
  123. return len(sw.listeners) > 0
  124. }
  125. // SetNodeInfo sets the switch's NodeInfo for checking compatibility and handshaking with other nodes.
  126. // NOTE: Not goroutine safe.
  127. func (sw *Switch) SetNodeInfo(nodeInfo *NodeInfo) {
  128. sw.nodeInfo = nodeInfo
  129. }
  130. // NodeInfo returns the switch's NodeInfo.
  131. // NOTE: Not goroutine safe.
  132. func (sw *Switch) NodeInfo() *NodeInfo {
  133. return sw.nodeInfo
  134. }
  135. // SetNodePrivKey sets the switche's private key for authenticated encryption.
  136. // NOTE: Overwrites sw.nodeInfo.PubKey.
  137. // NOTE: Not goroutine safe.
  138. func (sw *Switch) SetNodePrivKey(nodePrivKey crypto.PrivKeyEd25519) {
  139. sw.nodePrivKey = nodePrivKey
  140. if sw.nodeInfo != nil {
  141. sw.nodeInfo.PubKey = nodePrivKey.PubKey().Unwrap().(crypto.PubKeyEd25519)
  142. }
  143. }
  144. // OnStart implements BaseService. It starts all the reactors, peers, and listeners.
  145. func (sw *Switch) OnStart() error {
  146. sw.BaseService.OnStart()
  147. // Start reactors
  148. for _, reactor := range sw.reactors {
  149. _, err := reactor.Start()
  150. if err != nil {
  151. return err
  152. }
  153. }
  154. // Start listeners
  155. for _, listener := range sw.listeners {
  156. go sw.listenerRoutine(listener)
  157. }
  158. return nil
  159. }
  160. // OnStop implements BaseService. It stops all listeners, peers, and reactors.
  161. func (sw *Switch) OnStop() {
  162. sw.BaseService.OnStop()
  163. // Stop listeners
  164. for _, listener := range sw.listeners {
  165. listener.Stop()
  166. }
  167. sw.listeners = nil
  168. // Stop peers
  169. for _, peer := range sw.peers.List() {
  170. peer.Stop()
  171. sw.peers.Remove(peer)
  172. }
  173. // Stop reactors
  174. for _, reactor := range sw.reactors {
  175. reactor.Stop()
  176. }
  177. }
  178. // AddPeer checks the given peer's validity, performs a handshake, and adds the peer to the switch
  179. // and to all registered reactors.
  180. // NOTE: This performs a blocking handshake before the peer is added.
  181. // CONTRACT: If error is returned, peer is nil, and conn is immediately closed.
  182. func (sw *Switch) AddPeer(peer *Peer) error {
  183. if err := sw.FilterConnByAddr(peer.Addr()); err != nil {
  184. return err
  185. }
  186. if err := sw.FilterConnByPubKey(peer.PubKey()); err != nil {
  187. return err
  188. }
  189. if err := peer.HandshakeTimeout(sw.nodeInfo, time.Duration(sw.peerConfig.HandshakeTimeout*time.Second)); err != nil {
  190. return err
  191. }
  192. // Avoid self
  193. if sw.nodeInfo.PubKey.Equals(peer.PubKey().Wrap()) {
  194. return errors.New("Ignoring connection from self")
  195. }
  196. // Check version, chain id
  197. if err := sw.nodeInfo.CompatibleWith(peer.NodeInfo); err != nil {
  198. return err
  199. }
  200. // Check for duplicate peer
  201. if sw.peers.Has(peer.Key) {
  202. return ErrSwitchDuplicatePeer
  203. }
  204. // Start peer
  205. if sw.IsRunning() {
  206. sw.startInitPeer(peer)
  207. }
  208. // Add the peer to .peers.
  209. // We start it first so that a peer in the list is safe to Stop.
  210. // It should not err since we already checked peers.Has()
  211. if err := sw.peers.Add(peer); err != nil {
  212. return err
  213. }
  214. sw.Logger.Info("Added peer", "peer", peer)
  215. return nil
  216. }
  217. // FilterConnByAddr returns an error if connecting to the given address is forbidden.
  218. func (sw *Switch) FilterConnByAddr(addr net.Addr) error {
  219. if sw.filterConnByAddr != nil {
  220. return sw.filterConnByAddr(addr)
  221. }
  222. return nil
  223. }
  224. // FilterConnByPubKey returns an error if connecting to the given public key is forbidden.
  225. func (sw *Switch) FilterConnByPubKey(pubkey crypto.PubKeyEd25519) error {
  226. if sw.filterConnByPubKey != nil {
  227. return sw.filterConnByPubKey(pubkey)
  228. }
  229. return nil
  230. }
  231. // SetAddrFilter sets the function for filtering connections by address.
  232. func (sw *Switch) SetAddrFilter(f func(net.Addr) error) {
  233. sw.filterConnByAddr = f
  234. }
  235. // SetPubKeyFilter sets the function for filtering connections by public key.
  236. func (sw *Switch) SetPubKeyFilter(f func(crypto.PubKeyEd25519) error) {
  237. sw.filterConnByPubKey = f
  238. }
  239. func (sw *Switch) startInitPeer(peer *Peer) {
  240. peer.Start() // spawn send/recv routines
  241. for _, reactor := range sw.reactors {
  242. reactor.AddPeer(peer)
  243. }
  244. }
  245. // DialSeeds dials a list of seeds asynchronously in random order
  246. func (sw *Switch) DialSeeds(addrBook *AddrBook, seeds []string) error {
  247. netAddrs, err := NewNetAddressStrings(seeds)
  248. if err != nil {
  249. return err
  250. }
  251. if addrBook != nil {
  252. // add seeds to `addrBook`
  253. ourAddrS := sw.nodeInfo.ListenAddr
  254. ourAddr, _ := NewNetAddressString(ourAddrS)
  255. for _, netAddr := range netAddrs {
  256. // do not add ourselves
  257. if netAddr.Equals(ourAddr) {
  258. continue
  259. }
  260. addrBook.AddAddress(netAddr, ourAddr)
  261. }
  262. addrBook.Save()
  263. }
  264. // permute the list, dial them in random order.
  265. perm := rand.Perm(len(netAddrs))
  266. for i := 0; i < len(perm); i++ {
  267. go func(i int) {
  268. time.Sleep(time.Duration(rand.Int63n(3000)) * time.Millisecond)
  269. j := perm[i]
  270. sw.dialSeed(netAddrs[j])
  271. }(i)
  272. }
  273. return nil
  274. }
  275. func (sw *Switch) dialSeed(addr *NetAddress) {
  276. peer, err := sw.DialPeerWithAddress(addr, true)
  277. if err != nil {
  278. sw.Logger.Error("Error dialing seed", "err", err)
  279. } else {
  280. sw.Logger.Info("Connected to seed", "peer", peer)
  281. }
  282. }
  283. // DialPeerWithAddress dials the given peer and runs sw.AddPeer if it connects successfully.
  284. // If `persistent == true`, the switch will always try to reconnect to this peer if the connection ever fails.
  285. func (sw *Switch) DialPeerWithAddress(addr *NetAddress, persistent bool) (*Peer, error) {
  286. sw.dialing.Set(addr.IP.String(), addr)
  287. defer sw.dialing.Delete(addr.IP.String())
  288. sw.Logger.Info("Dialing peer", "address", addr)
  289. peer, err := newOutboundPeer(addr, sw.reactorsByCh, sw.chDescs, sw.StopPeerForError, sw.nodePrivKey, sw.peerConfig)
  290. if err != nil {
  291. sw.Logger.Error("Failed to dial peer", "address", addr, "err", err)
  292. return nil, err
  293. }
  294. peer.SetLogger(sw.Logger.With("peer", addr))
  295. if persistent {
  296. peer.makePersistent()
  297. }
  298. err = sw.AddPeer(peer)
  299. if err != nil {
  300. sw.Logger.Error("Failed to add peer", "address", addr, "err", err)
  301. peer.CloseConn()
  302. return nil, err
  303. }
  304. sw.Logger.Info("Dialed and added peer", "address", addr, "peer", peer)
  305. return peer, nil
  306. }
  307. // IsDialing returns true if the switch is currently dialing the given address.
  308. func (sw *Switch) IsDialing(addr *NetAddress) bool {
  309. return sw.dialing.Has(addr.IP.String())
  310. }
  311. // Broadcast runs a go routine for each attempted send, which will block
  312. // trying to send for defaultSendTimeoutSeconds. Returns a channel
  313. // which receives success values for each attempted send (false if times out)
  314. // NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved.
  315. // TODO: Something more intelligent.
  316. func (sw *Switch) Broadcast(chID byte, msg interface{}) chan bool {
  317. successChan := make(chan bool, len(sw.peers.List()))
  318. sw.Logger.Debug("Broadcast", "channel", chID, "msg", msg)
  319. for _, peer := range sw.peers.List() {
  320. go func(peer *Peer) {
  321. success := peer.Send(chID, msg)
  322. successChan <- success
  323. }(peer)
  324. }
  325. return successChan
  326. }
  327. // NumPeers returns the count of outbound/inbound and outbound-dialing peers.
  328. func (sw *Switch) NumPeers() (outbound, inbound, dialing int) {
  329. peers := sw.peers.List()
  330. for _, peer := range peers {
  331. if peer.outbound {
  332. outbound++
  333. } else {
  334. inbound++
  335. }
  336. }
  337. dialing = sw.dialing.Size()
  338. return
  339. }
  340. // Peers returns the set of peers the switch is connected to.
  341. func (sw *Switch) Peers() IPeerSet {
  342. return sw.peers
  343. }
  344. // StopPeerForError disconnects from a peer due to external error.
  345. // If the peer is persistent, it will attempt to reconnect.
  346. // TODO: make record depending on reason.
  347. func (sw *Switch) StopPeerForError(peer *Peer, reason interface{}) {
  348. addr := NewNetAddress(peer.Addr())
  349. sw.Logger.Error("Stopping peer for error", "peer", peer, "err", reason)
  350. sw.stopAndRemovePeer(peer, reason)
  351. if peer.IsPersistent() {
  352. go func() {
  353. sw.Logger.Info("Reconnecting to peer", "peer", peer)
  354. for i := 1; i < reconnectAttempts; i++ {
  355. if !sw.IsRunning() {
  356. return
  357. }
  358. peer, err := sw.DialPeerWithAddress(addr, true)
  359. if err != nil {
  360. if i == reconnectAttempts {
  361. sw.Logger.Info("Error reconnecting to peer. Giving up", "tries", i, "err", err)
  362. return
  363. }
  364. sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err)
  365. time.Sleep(reconnectInterval)
  366. continue
  367. }
  368. sw.Logger.Info("Reconnected to peer", "peer", peer)
  369. return
  370. }
  371. }()
  372. }
  373. }
  374. // StopPeerGracefully disconnects from a peer gracefully.
  375. // TODO: handle graceful disconnects.
  376. func (sw *Switch) StopPeerGracefully(peer *Peer) {
  377. sw.Logger.Info("Stopping peer gracefully")
  378. sw.stopAndRemovePeer(peer, nil)
  379. }
  380. func (sw *Switch) stopAndRemovePeer(peer *Peer, reason interface{}) {
  381. sw.peers.Remove(peer)
  382. peer.Stop()
  383. for _, reactor := range sw.reactors {
  384. reactor.RemovePeer(peer, reason)
  385. }
  386. }
  387. func (sw *Switch) listenerRoutine(l Listener) {
  388. for {
  389. inConn, ok := <-l.Connections()
  390. if !ok {
  391. break
  392. }
  393. // ignore connection if we already have enough
  394. maxPeers := sw.config.MaxNumPeers
  395. if maxPeers <= sw.peers.Size() {
  396. sw.Logger.Info("Ignoring inbound connection: already have enough peers", "address", inConn.RemoteAddr().String(), "numPeers", sw.peers.Size(), "max", maxPeers)
  397. continue
  398. }
  399. // New inbound connection!
  400. err := sw.addPeerWithConnectionAndConfig(inConn, sw.peerConfig)
  401. if err != nil {
  402. sw.Logger.Info("Ignoring inbound connection: error while adding peer", "address", inConn.RemoteAddr().String(), "err", err)
  403. continue
  404. }
  405. // NOTE: We don't yet have the listening port of the
  406. // remote (if they have a listener at all).
  407. // The peerHandshake will handle that
  408. }
  409. // cleanup
  410. }
  411. //-----------------------------------------------------------------------------
  412. type SwitchEventNewPeer struct {
  413. Peer *Peer
  414. }
  415. type SwitchEventDonePeer struct {
  416. Peer *Peer
  417. Error interface{}
  418. }
  419. //------------------------------------------------------------------
  420. // Switches connected via arbitrary net.Conn; useful for testing
  421. // MakeConnectedSwitches returns n switches, connected according to the connect func.
  422. // If connect==Connect2Switches, the switches will be fully connected.
  423. // initSwitch defines how the ith switch should be initialized (ie. with what reactors).
  424. // NOTE: panics if any switch fails to start.
  425. func MakeConnectedSwitches(cfg *cfg.P2PConfig, n int, initSwitch func(int, *Switch) *Switch, connect func([]*Switch, int, int)) []*Switch {
  426. switches := make([]*Switch, n)
  427. for i := 0; i < n; i++ {
  428. switches[i] = makeSwitch(cfg, i, "testing", "123.123.123", initSwitch)
  429. }
  430. if err := StartSwitches(switches); err != nil {
  431. panic(err)
  432. }
  433. for i := 0; i < n; i++ {
  434. for j := i; j < n; j++ {
  435. connect(switches, i, j)
  436. }
  437. }
  438. return switches
  439. }
  440. var PanicOnAddPeerErr = false
  441. // Connect2Switches will connect switches i and j via net.Pipe()
  442. // Blocks until a conection is established.
  443. // NOTE: caller ensures i and j are within bounds
  444. func Connect2Switches(switches []*Switch, i, j int) {
  445. switchI := switches[i]
  446. switchJ := switches[j]
  447. c1, c2 := net.Pipe()
  448. doneCh := make(chan struct{})
  449. go func() {
  450. err := switchI.addPeerWithConnection(c1)
  451. if PanicOnAddPeerErr && err != nil {
  452. panic(err)
  453. }
  454. doneCh <- struct{}{}
  455. }()
  456. go func() {
  457. err := switchJ.addPeerWithConnection(c2)
  458. if PanicOnAddPeerErr && err != nil {
  459. panic(err)
  460. }
  461. doneCh <- struct{}{}
  462. }()
  463. <-doneCh
  464. <-doneCh
  465. }
  466. // StartSwitches calls sw.Start() for each given switch.
  467. // It returns the first encountered error.
  468. func StartSwitches(switches []*Switch) error {
  469. for _, s := range switches {
  470. _, err := s.Start() // start switch and reactors
  471. if err != nil {
  472. return err
  473. }
  474. }
  475. return nil
  476. }
  477. func makeSwitch(cfg *cfg.P2PConfig, i int, network, version string, initSwitch func(int, *Switch) *Switch) *Switch {
  478. privKey := crypto.GenPrivKeyEd25519()
  479. // new switch, add reactors
  480. // TODO: let the config be passed in?
  481. s := initSwitch(i, NewSwitch(cfg))
  482. s.SetNodeInfo(&NodeInfo{
  483. PubKey: privKey.PubKey().Unwrap().(crypto.PubKeyEd25519),
  484. Moniker: cmn.Fmt("switch%d", i),
  485. Network: network,
  486. Version: version,
  487. RemoteAddr: cmn.Fmt("%v:%v", network, rand.Intn(64512)+1023),
  488. ListenAddr: cmn.Fmt("%v:%v", network, rand.Intn(64512)+1023),
  489. })
  490. s.SetNodePrivKey(privKey)
  491. return s
  492. }
  493. func (sw *Switch) addPeerWithConnection(conn net.Conn) error {
  494. peer, err := newInboundPeer(conn, sw.reactorsByCh, sw.chDescs, sw.StopPeerForError, sw.nodePrivKey, sw.peerConfig)
  495. if err != nil {
  496. conn.Close()
  497. return err
  498. }
  499. peer.SetLogger(sw.Logger.With("peer", conn.RemoteAddr()))
  500. if err = sw.AddPeer(peer); err != nil {
  501. conn.Close()
  502. return err
  503. }
  504. return nil
  505. }
  506. func (sw *Switch) addPeerWithConnectionAndConfig(conn net.Conn, config *PeerConfig) error {
  507. peer, err := newInboundPeer(conn, sw.reactorsByCh, sw.chDescs, sw.StopPeerForError, sw.nodePrivKey, config)
  508. if err != nil {
  509. conn.Close()
  510. return err
  511. }
  512. peer.SetLogger(sw.Logger.With("peer", conn.RemoteAddr()))
  513. if err = sw.AddPeer(peer); err != nil {
  514. conn.Close()
  515. return err
  516. }
  517. return nil
  518. }