You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

591 lines
15 KiB

9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
8 years ago
8 years ago
8 years ago
8 years ago
  1. package p2p
  2. import (
  3. "errors"
  4. "fmt"
  5. "math/rand"
  6. "net"
  7. "time"
  8. . "github.com/tendermint/go-common"
  9. cfg "github.com/tendermint/go-config"
  10. crypto "github.com/tendermint/go-crypto"
  11. "github.com/tendermint/log15"
  12. )
  13. const (
  14. reconnectAttempts = 30
  15. reconnectInterval = 3 * time.Second
  16. )
  17. type Reactor interface {
  18. Service // Start, Stop
  19. SetSwitch(*Switch)
  20. GetChannels() []*ChannelDescriptor
  21. AddPeer(peer *Peer)
  22. RemovePeer(peer *Peer, reason interface{})
  23. Receive(chID byte, peer *Peer, msgBytes []byte)
  24. }
  25. //--------------------------------------
  26. type BaseReactor struct {
  27. BaseService // Provides Start, Stop, .Quit
  28. Switch *Switch
  29. }
  30. func NewBaseReactor(log log15.Logger, name string, impl Reactor) *BaseReactor {
  31. return &BaseReactor{
  32. BaseService: *NewBaseService(log, name, impl),
  33. Switch: nil,
  34. }
  35. }
  36. func (br *BaseReactor) SetSwitch(sw *Switch) {
  37. br.Switch = sw
  38. }
  39. func (_ *BaseReactor) GetChannels() []*ChannelDescriptor { return nil }
  40. func (_ *BaseReactor) AddPeer(peer *Peer) {}
  41. func (_ *BaseReactor) RemovePeer(peer *Peer, reason interface{}) {}
  42. func (_ *BaseReactor) Receive(chID byte, peer *Peer, msgBytes []byte) {}
  43. //-----------------------------------------------------------------------------
  44. /*
  45. The `Switch` handles peer connections and exposes an API to receive incoming messages
  46. on `Reactors`. Each `Reactor` is responsible for handling incoming messages of one
  47. or more `Channels`. So while sending outgoing messages is typically performed on the peer,
  48. incoming messages are received on the reactor.
  49. */
  50. type Switch struct {
  51. BaseService
  52. config cfg.Config
  53. listeners []Listener
  54. reactors map[string]Reactor
  55. chDescs []*ChannelDescriptor
  56. reactorsByCh map[byte]Reactor
  57. peers *PeerSet
  58. dialing *CMap
  59. nodeInfo *NodeInfo // our node info
  60. nodePrivKey crypto.PrivKeyEd25519 // our node privkey
  61. filterConnByAddr func(net.Addr) error
  62. filterConnByPubKey func(crypto.PubKeyEd25519) error
  63. }
  64. var (
  65. ErrSwitchDuplicatePeer = errors.New("Duplicate peer")
  66. ErrSwitchMaxPeersPerIPRange = errors.New("IP range has too many peers")
  67. )
  68. func NewSwitch(config cfg.Config) *Switch {
  69. setConfigDefaults(config)
  70. sw := &Switch{
  71. config: config,
  72. reactors: make(map[string]Reactor),
  73. chDescs: make([]*ChannelDescriptor, 0),
  74. reactorsByCh: make(map[byte]Reactor),
  75. peers: NewPeerSet(),
  76. dialing: NewCMap(),
  77. nodeInfo: nil,
  78. }
  79. sw.BaseService = *NewBaseService(log, "P2P Switch", sw)
  80. return sw
  81. }
  82. // Not goroutine safe.
  83. func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor {
  84. // Validate the reactor.
  85. // No two reactors can share the same channel.
  86. reactorChannels := reactor.GetChannels()
  87. for _, chDesc := range reactorChannels {
  88. chID := chDesc.ID
  89. if sw.reactorsByCh[chID] != nil {
  90. PanicSanity(fmt.Sprintf("Channel %X has multiple reactors %v & %v", chID, sw.reactorsByCh[chID], reactor))
  91. }
  92. sw.chDescs = append(sw.chDescs, chDesc)
  93. sw.reactorsByCh[chID] = reactor
  94. }
  95. sw.reactors[name] = reactor
  96. reactor.SetSwitch(sw)
  97. return reactor
  98. }
  99. // Not goroutine safe.
  100. func (sw *Switch) Reactors() map[string]Reactor {
  101. return sw.reactors
  102. }
  103. // Not goroutine safe.
  104. func (sw *Switch) Reactor(name string) Reactor {
  105. return sw.reactors[name]
  106. }
  107. // Not goroutine safe.
  108. func (sw *Switch) AddListener(l Listener) {
  109. sw.listeners = append(sw.listeners, l)
  110. }
  111. // Not goroutine safe.
  112. func (sw *Switch) Listeners() []Listener {
  113. return sw.listeners
  114. }
  115. // Not goroutine safe.
  116. func (sw *Switch) IsListening() bool {
  117. return len(sw.listeners) > 0
  118. }
  119. // Not goroutine safe.
  120. func (sw *Switch) SetNodeInfo(nodeInfo *NodeInfo) {
  121. sw.nodeInfo = nodeInfo
  122. }
  123. // Not goroutine safe.
  124. func (sw *Switch) NodeInfo() *NodeInfo {
  125. return sw.nodeInfo
  126. }
  127. // Not goroutine safe.
  128. // NOTE: Overwrites sw.nodeInfo.PubKey
  129. func (sw *Switch) SetNodePrivKey(nodePrivKey crypto.PrivKeyEd25519) {
  130. sw.nodePrivKey = nodePrivKey
  131. if sw.nodeInfo != nil {
  132. sw.nodeInfo.PubKey = nodePrivKey.PubKey().(crypto.PubKeyEd25519)
  133. }
  134. }
  135. // Switch.Start() starts all the reactors, peers, and listeners.
  136. func (sw *Switch) OnStart() error {
  137. sw.BaseService.OnStart()
  138. // Start reactors
  139. for _, reactor := range sw.reactors {
  140. _, err := reactor.Start()
  141. if err != nil {
  142. return err
  143. }
  144. }
  145. // Start peers
  146. for _, peer := range sw.peers.List() {
  147. sw.startInitPeer(peer)
  148. }
  149. // Start listeners
  150. for _, listener := range sw.listeners {
  151. go sw.listenerRoutine(listener)
  152. }
  153. return nil
  154. }
  155. func (sw *Switch) OnStop() {
  156. sw.BaseService.OnStop()
  157. // Stop listeners
  158. for _, listener := range sw.listeners {
  159. listener.Stop()
  160. }
  161. sw.listeners = nil
  162. // Stop peers
  163. for _, peer := range sw.peers.List() {
  164. peer.Stop()
  165. sw.peers.Remove(peer)
  166. }
  167. // Stop reactors
  168. for _, reactor := range sw.reactors {
  169. reactor.Stop()
  170. }
  171. }
  172. // NOTE: This performs a blocking handshake before the peer is added.
  173. // CONTRACT: If error is returned, peer is nil, and conn is immediately closed.
  174. func (sw *Switch) AddPeer(peer *Peer) error {
  175. if err := sw.FilterConnByAddr(peer.RemoteAddr()); err != nil {
  176. return err
  177. }
  178. if err := sw.FilterConnByPubKey(peer.PubKey()); err != nil {
  179. return err
  180. }
  181. if err := peer.HandshakeTimeout(sw.nodeInfo, time.Duration(sw.config.GetInt(configKeyHandshakeTimeoutSeconds))*time.Second); err != nil {
  182. return err
  183. }
  184. // Avoid self
  185. if sw.nodeInfo.PubKey.Equals(peer.PubKey()) {
  186. return errors.New("Ignoring connection from self")
  187. }
  188. // Check version, chain id
  189. if err := sw.nodeInfo.CompatibleWith(peer.NodeInfo); err != nil {
  190. return err
  191. }
  192. // Add the peer to .peers
  193. // ignore if duplicate or if we already have too many for that IP range
  194. if err := sw.peers.Add(peer); err != nil {
  195. log.Notice("Ignoring peer", "error", err, "peer", peer)
  196. peer.Stop()
  197. return err
  198. }
  199. // Start peer
  200. if sw.IsRunning() {
  201. sw.startInitPeer(peer)
  202. }
  203. log.Notice("Added peer", "peer", peer)
  204. return nil
  205. }
  206. func (sw *Switch) FilterConnByAddr(addr net.Addr) error {
  207. if sw.filterConnByAddr != nil {
  208. return sw.filterConnByAddr(addr)
  209. }
  210. return nil
  211. }
  212. func (sw *Switch) FilterConnByPubKey(pubkey crypto.PubKeyEd25519) error {
  213. if sw.filterConnByPubKey != nil {
  214. return sw.filterConnByPubKey(pubkey)
  215. }
  216. return nil
  217. }
  218. func (sw *Switch) SetAddrFilter(f func(net.Addr) error) {
  219. sw.filterConnByAddr = f
  220. }
  221. func (sw *Switch) SetPubKeyFilter(f func(crypto.PubKeyEd25519) error) {
  222. sw.filterConnByPubKey = f
  223. }
  224. func (sw *Switch) startInitPeer(peer *Peer) {
  225. peer.Start() // spawn send/recv routines
  226. for _, reactor := range sw.reactors {
  227. reactor.AddPeer(peer)
  228. }
  229. }
  230. // Dial a list of seeds asynchronously in random order
  231. func (sw *Switch) DialSeeds(addrBook *AddrBook, seeds []string) error {
  232. netAddrs, err := NewNetAddressStrings(seeds)
  233. if err != nil {
  234. return err
  235. }
  236. if addrBook != nil {
  237. // add seeds to `addrBook`
  238. ourAddrS := sw.nodeInfo.ListenAddr
  239. ourAddr, _ := NewNetAddressString(ourAddrS)
  240. for _, netAddr := range netAddrs {
  241. // do not add ourselves
  242. if netAddr.Equals(ourAddr) {
  243. continue
  244. }
  245. addrBook.AddAddress(netAddr, ourAddr)
  246. }
  247. addrBook.Save()
  248. }
  249. // permute the list, dial them in random order.
  250. perm := rand.Perm(len(netAddrs))
  251. for i := 0; i < len(perm); i++ {
  252. go func(i int) {
  253. time.Sleep(time.Duration(rand.Int63n(3000)) * time.Millisecond)
  254. j := perm[i]
  255. sw.dialSeed(netAddrs[j])
  256. }(i)
  257. }
  258. return nil
  259. }
  260. func (sw *Switch) dialSeed(addr *NetAddress) {
  261. peer, err := sw.DialPeerWithAddress(addr, true)
  262. if err != nil {
  263. log.Error("Error dialing seed", "error", err)
  264. return
  265. } else {
  266. log.Notice("Connected to seed", "peer", peer)
  267. }
  268. }
  269. func (sw *Switch) DialPeerWithAddress(addr *NetAddress, persistent bool) (*Peer, error) {
  270. sw.dialing.Set(addr.IP.String(), addr)
  271. defer sw.dialing.Delete(addr.IP.String())
  272. peer, err := newOutboundPeerWithConfig(addr, sw.reactorsByCh, sw.chDescs, sw.StopPeerForError, sw.nodePrivKey, peerConfigFromGoConfig(sw.config))
  273. if err != nil {
  274. log.Info("Failed dialing peer", "address", addr, "error", err)
  275. return nil, err
  276. }
  277. if persistent {
  278. peer.makePersistent()
  279. }
  280. err = sw.AddPeer(peer)
  281. if err != nil {
  282. log.Info("Failed adding peer", "address", addr, "error", err)
  283. peer.CloseConn()
  284. return nil, err
  285. }
  286. log.Notice("Dialed and added peer", "address", addr, "peer", peer)
  287. return peer, nil
  288. }
  289. func (sw *Switch) IsDialing(addr *NetAddress) bool {
  290. return sw.dialing.Has(addr.IP.String())
  291. }
  292. // Broadcast runs a go routine for each attempted send, which will block
  293. // trying to send for defaultSendTimeoutSeconds. Returns a channel
  294. // which receives success values for each attempted send (false if times out)
  295. // NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved.
  296. func (sw *Switch) Broadcast(chID byte, msg interface{}) chan bool {
  297. successChan := make(chan bool, len(sw.peers.List()))
  298. log.Debug("Broadcast", "channel", chID, "msg", msg)
  299. for _, peer := range sw.peers.List() {
  300. go func(peer *Peer) {
  301. success := peer.Send(chID, msg)
  302. successChan <- success
  303. }(peer)
  304. }
  305. return successChan
  306. }
  307. // Returns the count of outbound/inbound and outbound-dialing peers.
  308. func (sw *Switch) NumPeers() (outbound, inbound, dialing int) {
  309. peers := sw.peers.List()
  310. for _, peer := range peers {
  311. if peer.outbound {
  312. outbound++
  313. } else {
  314. inbound++
  315. }
  316. }
  317. dialing = sw.dialing.Size()
  318. return
  319. }
  320. func (sw *Switch) Peers() IPeerSet {
  321. return sw.peers
  322. }
  323. // Disconnect from a peer due to external error, retry if it is a persistent peer.
  324. // TODO: make record depending on reason.
  325. func (sw *Switch) StopPeerForError(peer *Peer, reason interface{}) {
  326. addr := NewNetAddress(peer.RemoteAddr())
  327. log.Notice("Stopping peer for error", "peer", peer, "error", reason)
  328. sw.stopAndRemovePeer(peer, reason)
  329. if peer.IsPersistent() {
  330. go func() {
  331. log.Notice("Reconnecting to peer", "peer", peer)
  332. for i := 1; i < reconnectAttempts; i++ {
  333. if !sw.IsRunning() {
  334. return
  335. }
  336. peer, err := sw.DialPeerWithAddress(addr, true)
  337. if err != nil {
  338. if i == reconnectAttempts {
  339. log.Notice("Error reconnecting to peer. Giving up", "tries", i, "error", err)
  340. return
  341. }
  342. log.Notice("Error reconnecting to peer. Trying again", "tries", i, "error", err)
  343. time.Sleep(reconnectInterval)
  344. continue
  345. }
  346. log.Notice("Reconnected to peer", "peer", peer)
  347. return
  348. }
  349. }()
  350. }
  351. }
  352. // Disconnect from a peer gracefully.
  353. // TODO: handle graceful disconnects.
  354. func (sw *Switch) StopPeerGracefully(peer *Peer) {
  355. log.Notice("Stopping peer gracefully")
  356. sw.stopAndRemovePeer(peer, nil)
  357. }
  358. func (sw *Switch) stopAndRemovePeer(peer *Peer, reason interface{}) {
  359. sw.peers.Remove(peer)
  360. peer.Stop()
  361. for _, reactor := range sw.reactors {
  362. reactor.RemovePeer(peer, reason)
  363. }
  364. }
  365. func (sw *Switch) listenerRoutine(l Listener) {
  366. for {
  367. inConn, ok := <-l.Connections()
  368. if !ok {
  369. break
  370. }
  371. // ignore connection if we already have enough
  372. maxPeers := sw.config.GetInt(configKeyMaxNumPeers)
  373. if maxPeers <= sw.peers.Size() {
  374. log.Info("Ignoring inbound connection: already have enough peers", "address", inConn.RemoteAddr().String(), "numPeers", sw.peers.Size(), "max", maxPeers)
  375. continue
  376. }
  377. // New inbound connection!
  378. err := sw.addPeerWithConnectionAndConfig(inConn, peerConfigFromGoConfig(sw.config))
  379. if err != nil {
  380. log.Notice("Ignoring inbound connection: error while adding peer", "address", inConn.RemoteAddr().String(), "error", err)
  381. continue
  382. }
  383. // NOTE: We don't yet have the listening port of the
  384. // remote (if they have a listener at all).
  385. // The peerHandshake will handle that
  386. }
  387. // cleanup
  388. }
  389. //-----------------------------------------------------------------------------
  390. type SwitchEventNewPeer struct {
  391. Peer *Peer
  392. }
  393. type SwitchEventDonePeer struct {
  394. Peer *Peer
  395. Error interface{}
  396. }
  397. //------------------------------------------------------------------
  398. // Switches connected via arbitrary net.Conn; useful for testing
  399. // Returns n switches, connected according to the connect func.
  400. // If connect==Connect2Switches, the switches will be fully connected.
  401. // initSwitch defines how the ith switch should be initialized (ie. with what reactors).
  402. // NOTE: panics if any switch fails to start.
  403. func MakeConnectedSwitches(n int, initSwitch func(int, *Switch) *Switch, connect func([]*Switch, int, int)) []*Switch {
  404. switches := make([]*Switch, n)
  405. for i := 0; i < n; i++ {
  406. switches[i] = makeSwitch(i, "testing", "123.123.123", initSwitch)
  407. }
  408. if err := StartSwitches(switches); err != nil {
  409. panic(err)
  410. }
  411. for i := 0; i < n; i++ {
  412. for j := i; j < n; j++ {
  413. connect(switches, i, j)
  414. }
  415. }
  416. return switches
  417. }
  418. var PanicOnAddPeerErr = false
  419. // Will connect switches i and j via net.Pipe()
  420. // Blocks until a conection is established.
  421. // NOTE: caller ensures i and j are within bounds
  422. func Connect2Switches(switches []*Switch, i, j int) {
  423. switchI := switches[i]
  424. switchJ := switches[j]
  425. c1, c2 := net.Pipe()
  426. doneCh := make(chan struct{})
  427. go func() {
  428. err := switchI.addPeerWithConnection(c1)
  429. if PanicOnAddPeerErr && err != nil {
  430. panic(err)
  431. }
  432. doneCh <- struct{}{}
  433. }()
  434. go func() {
  435. err := switchJ.addPeerWithConnection(c2)
  436. if PanicOnAddPeerErr && err != nil {
  437. panic(err)
  438. }
  439. doneCh <- struct{}{}
  440. }()
  441. <-doneCh
  442. <-doneCh
  443. }
  444. func StartSwitches(switches []*Switch) error {
  445. for _, s := range switches {
  446. _, err := s.Start() // start switch and reactors
  447. if err != nil {
  448. return err
  449. }
  450. }
  451. return nil
  452. }
  453. func makeSwitch(i int, network, version string, initSwitch func(int, *Switch) *Switch) *Switch {
  454. privKey := crypto.GenPrivKeyEd25519()
  455. // new switch, add reactors
  456. // TODO: let the config be passed in?
  457. s := initSwitch(i, NewSwitch(cfg.NewMapConfig(nil)))
  458. s.SetNodeInfo(&NodeInfo{
  459. PubKey: privKey.PubKey().(crypto.PubKeyEd25519),
  460. Moniker: Fmt("switch%d", i),
  461. Network: network,
  462. Version: version,
  463. })
  464. s.SetNodePrivKey(privKey)
  465. return s
  466. }
  467. func (sw *Switch) addPeerWithConnection(conn net.Conn) error {
  468. peer, err := newInboundPeer(conn, sw.reactorsByCh, sw.chDescs, sw.StopPeerForError, sw.nodePrivKey)
  469. if err != nil {
  470. conn.Close()
  471. return err
  472. }
  473. if err = sw.AddPeer(peer); err != nil {
  474. conn.Close()
  475. return err
  476. }
  477. return nil
  478. }
  479. func (sw *Switch) addPeerWithConnectionAndConfig(conn net.Conn, config *PeerConfig) error {
  480. peer, err := newInboundPeerWithConfig(conn, sw.reactorsByCh, sw.chDescs, sw.StopPeerForError, sw.nodePrivKey, config)
  481. if err != nil {
  482. conn.Close()
  483. return err
  484. }
  485. if err = sw.AddPeer(peer); err != nil {
  486. conn.Close()
  487. return err
  488. }
  489. return nil
  490. }
  491. func peerConfigFromGoConfig(config cfg.Config) *PeerConfig {
  492. return &PeerConfig{
  493. AuthEnc: config.GetBool(configKeyAuthEnc),
  494. Fuzz: config.GetBool(configFuzzEnable),
  495. HandshakeTimeout: time.Duration(config.GetInt(configKeyHandshakeTimeoutSeconds)) * time.Second,
  496. DialTimeout: time.Duration(config.GetInt(configKeyDialTimeoutSeconds)) * time.Second,
  497. MConfig: &MConnConfig{
  498. SendRate: int64(config.GetInt(configKeySendRate)),
  499. RecvRate: int64(config.GetInt(configKeyRecvRate)),
  500. },
  501. FuzzConfig: &FuzzConnConfig{
  502. Mode: config.GetInt(configFuzzMode),
  503. MaxDelay: time.Duration(config.GetInt(configFuzzMaxDelayMilliseconds)) * time.Millisecond,
  504. ProbDropRW: config.GetFloat64(configFuzzProbDropRW),
  505. ProbDropConn: config.GetFloat64(configFuzzProbDropConn),
  506. ProbSleep: config.GetFloat64(configFuzzProbSleep),
  507. },
  508. }
  509. }