You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

715 lines
20 KiB

9 years ago
9 years ago
9 years ago
9 years ago
7 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
7 years ago
9 years ago
9 years ago
8 years ago
7 years ago
9 years ago
7 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
8 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
8 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
7 years ago
p2p: introduce peerConn to simplify peer creation (#1226) * expose AuthEnc in the P2P config if AuthEnc is true, dialed peers must have a node ID in the address and it must match the persistent pubkey from the secret handshake. Refs #1157 * fixes after my own review * fix docs * fix build failure ``` p2p/pex/pex_reactor_test.go:288:88: cannot use seed.NodeInfo().NetAddress() (type *p2p.NetAddress) as type string in array or slice literal ``` * p2p: introduce peerConn to simplify peer creation * Introduce `peerConn` containing the known fields of `peer` * `peer` only created in `sw.addPeer` once handshake is complete and NodeInfo is checked * Eliminates some mutable variables and makes the code flow better * Simplifies the `newXxxPeer` funcs * Use ID instead of PubKey where possible. * SetPubKeyFilter -> SetIDFilter * nodeInfo.Validate takes ID * remove peer.PubKey() * persistent node ids * fixes from review * test: use ip_plus_id.sh more * fix invalid memory panic during fast_sync test ``` 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: panic: runtime error: invalid memory address or nil pointer dereference 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: [signal SIGSEGV: segmentation violation code=0x1 addr=0x20 pc=0x98dd3e] 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: goroutine 3432 [running]: 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: github.com/tendermint/tendermint/p2p.newOutboundPeerConn(0xc423fd1380, 0xc420933e00, 0x1, 0x1239a60, 0 xc420128c40, 0x2, 0x42caf6, 0xc42001f300, 0xc422831d98, 0xc4227951c0, ...) 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: #011/go/src/github.com/tendermint/tendermint/p2p/peer.go:123 +0x31e 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: github.com/tendermint/tendermint/p2p.(*Switch).addOutboundPeerWithConfig(0xc4200ad040, 0xc423fd1380, 0 xc420933e00, 0xc423f48801, 0x28, 0x2) 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: #011/go/src/github.com/tendermint/tendermint/p2p/switch.go:455 +0x12b 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: github.com/tendermint/tendermint/p2p.(*Switch).DialPeerWithAddress(0xc4200ad040, 0xc423fd1380, 0x1, 0x 0, 0x0) 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: #011/go/src/github.com/tendermint/tendermint/p2p/switch.go:371 +0xdc 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: github.com/tendermint/tendermint/p2p.(*Switch).reconnectToPeer(0xc4200ad040, 0x123e000, 0xc42007bb00) 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: #011/go/src/github.com/tendermint/tendermint/p2p/switch.go:290 +0x25f 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: created by github.com/tendermint/tendermint/p2p.(*Switch).StopPeerForError 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: #011/go/src/github.com/tendermint/tendermint/p2p/switch.go:256 +0x1b7 ```
7 years ago
p2p: introduce peerConn to simplify peer creation (#1226) * expose AuthEnc in the P2P config if AuthEnc is true, dialed peers must have a node ID in the address and it must match the persistent pubkey from the secret handshake. Refs #1157 * fixes after my own review * fix docs * fix build failure ``` p2p/pex/pex_reactor_test.go:288:88: cannot use seed.NodeInfo().NetAddress() (type *p2p.NetAddress) as type string in array or slice literal ``` * p2p: introduce peerConn to simplify peer creation * Introduce `peerConn` containing the known fields of `peer` * `peer` only created in `sw.addPeer` once handshake is complete and NodeInfo is checked * Eliminates some mutable variables and makes the code flow better * Simplifies the `newXxxPeer` funcs * Use ID instead of PubKey where possible. * SetPubKeyFilter -> SetIDFilter * nodeInfo.Validate takes ID * remove peer.PubKey() * persistent node ids * fixes from review * test: use ip_plus_id.sh more * fix invalid memory panic during fast_sync test ``` 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: panic: runtime error: invalid memory address or nil pointer dereference 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: [signal SIGSEGV: segmentation violation code=0x1 addr=0x20 pc=0x98dd3e] 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: goroutine 3432 [running]: 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: github.com/tendermint/tendermint/p2p.newOutboundPeerConn(0xc423fd1380, 0xc420933e00, 0x1, 0x1239a60, 0 xc420128c40, 0x2, 0x42caf6, 0xc42001f300, 0xc422831d98, 0xc4227951c0, ...) 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: #011/go/src/github.com/tendermint/tendermint/p2p/peer.go:123 +0x31e 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: github.com/tendermint/tendermint/p2p.(*Switch).addOutboundPeerWithConfig(0xc4200ad040, 0xc423fd1380, 0 xc420933e00, 0xc423f48801, 0x28, 0x2) 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: #011/go/src/github.com/tendermint/tendermint/p2p/switch.go:455 +0x12b 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: github.com/tendermint/tendermint/p2p.(*Switch).DialPeerWithAddress(0xc4200ad040, 0xc423fd1380, 0x1, 0x 0, 0x0) 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: #011/go/src/github.com/tendermint/tendermint/p2p/switch.go:371 +0xdc 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: github.com/tendermint/tendermint/p2p.(*Switch).reconnectToPeer(0xc4200ad040, 0x123e000, 0xc42007bb00) 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: #011/go/src/github.com/tendermint/tendermint/p2p/switch.go:290 +0x25f 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: created by github.com/tendermint/tendermint/p2p.(*Switch).StopPeerForError 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: #011/go/src/github.com/tendermint/tendermint/p2p/switch.go:256 +0x1b7 ```
7 years ago
p2p: introduce peerConn to simplify peer creation (#1226) * expose AuthEnc in the P2P config if AuthEnc is true, dialed peers must have a node ID in the address and it must match the persistent pubkey from the secret handshake. Refs #1157 * fixes after my own review * fix docs * fix build failure ``` p2p/pex/pex_reactor_test.go:288:88: cannot use seed.NodeInfo().NetAddress() (type *p2p.NetAddress) as type string in array or slice literal ``` * p2p: introduce peerConn to simplify peer creation * Introduce `peerConn` containing the known fields of `peer` * `peer` only created in `sw.addPeer` once handshake is complete and NodeInfo is checked * Eliminates some mutable variables and makes the code flow better * Simplifies the `newXxxPeer` funcs * Use ID instead of PubKey where possible. * SetPubKeyFilter -> SetIDFilter * nodeInfo.Validate takes ID * remove peer.PubKey() * persistent node ids * fixes from review * test: use ip_plus_id.sh more * fix invalid memory panic during fast_sync test ``` 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: panic: runtime error: invalid memory address or nil pointer dereference 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: [signal SIGSEGV: segmentation violation code=0x1 addr=0x20 pc=0x98dd3e] 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: goroutine 3432 [running]: 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: github.com/tendermint/tendermint/p2p.newOutboundPeerConn(0xc423fd1380, 0xc420933e00, 0x1, 0x1239a60, 0 xc420128c40, 0x2, 0x42caf6, 0xc42001f300, 0xc422831d98, 0xc4227951c0, ...) 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: #011/go/src/github.com/tendermint/tendermint/p2p/peer.go:123 +0x31e 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: github.com/tendermint/tendermint/p2p.(*Switch).addOutboundPeerWithConfig(0xc4200ad040, 0xc423fd1380, 0 xc420933e00, 0xc423f48801, 0x28, 0x2) 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: #011/go/src/github.com/tendermint/tendermint/p2p/switch.go:455 +0x12b 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: github.com/tendermint/tendermint/p2p.(*Switch).DialPeerWithAddress(0xc4200ad040, 0xc423fd1380, 0x1, 0x 0, 0x0) 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: #011/go/src/github.com/tendermint/tendermint/p2p/switch.go:371 +0xdc 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: github.com/tendermint/tendermint/p2p.(*Switch).reconnectToPeer(0xc4200ad040, 0x123e000, 0xc42007bb00) 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: #011/go/src/github.com/tendermint/tendermint/p2p/switch.go:290 +0x25f 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: created by github.com/tendermint/tendermint/p2p.(*Switch).StopPeerForError 2018-02-21T06:30:05Z box887.localdomain docker/local_testnet_4[14907]: #011/go/src/github.com/tendermint/tendermint/p2p/switch.go:256 +0x1b7 ```
7 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
  1. package pex
  2. import (
  3. "fmt"
  4. "reflect"
  5. "sort"
  6. "sync"
  7. "time"
  8. "github.com/pkg/errors"
  9. amino "github.com/tendermint/go-amino"
  10. cmn "github.com/tendermint/tendermint/libs/common"
  11. "github.com/tendermint/tendermint/p2p"
  12. "github.com/tendermint/tendermint/p2p/conn"
  13. )
  14. type Peer = p2p.Peer
  15. const (
  16. // PexChannel is a channel for PEX messages
  17. PexChannel = byte(0x00)
  18. // over-estimate of max NetAddress size
  19. // hexID (40) + IP (16) + Port (2) + Name (100) ...
  20. // NOTE: dont use massive DNS name ..
  21. maxAddressSize = 256
  22. // NOTE: amplificaiton factor!
  23. // small request results in up to maxMsgSize response
  24. maxMsgSize = maxAddressSize * maxGetSelection
  25. // ensure we have enough peers
  26. defaultEnsurePeersPeriod = 30 * time.Second
  27. // Seed/Crawler constants
  28. // We want seeds to only advertise good peers. Therefore they should wait at
  29. // least as long as we expect it to take for a peer to become good before
  30. // disconnecting.
  31. // see consensus/reactor.go: blocksToContributeToBecomeGoodPeer
  32. // 10000 blocks assuming 1s blocks ~ 2.7 hours.
  33. defaultSeedDisconnectWaitPeriod = 3 * time.Hour
  34. defaultCrawlPeerInterval = 2 * time.Minute // don't redial for this. TODO: back-off. what for?
  35. defaultCrawlPeersPeriod = 30 * time.Second // check some peers every this
  36. maxAttemptsToDial = 16 // ~ 35h in total (last attempt - 18h)
  37. // if node connects to seed, it does not have any trusted peers.
  38. // Especially in the beginning, node should have more trusted peers than
  39. // untrusted.
  40. biasToSelectNewPeers = 30 // 70 to select good peers
  41. )
  42. // PEXReactor handles PEX (peer exchange) and ensures that an
  43. // adequate number of peers are connected to the switch.
  44. //
  45. // It uses `AddrBook` (address book) to store `NetAddress`es of the peers.
  46. //
  47. // ## Preventing abuse
  48. //
  49. // Only accept pexAddrsMsg from peers we sent a corresponding pexRequestMsg too.
  50. // Only accept one pexRequestMsg every ~defaultEnsurePeersPeriod.
  51. type PEXReactor struct {
  52. p2p.BaseReactor
  53. book AddrBook
  54. config *PEXReactorConfig
  55. ensurePeersPeriod time.Duration // TODO: should go in the config
  56. // maps to prevent abuse
  57. requestsSent *cmn.CMap // ID->struct{}: unanswered send requests
  58. lastReceivedRequests *cmn.CMap // ID->time.Time: last time peer requested from us
  59. seedAddrs []*p2p.NetAddress
  60. attemptsToDial sync.Map // address (string) -> {number of attempts (int), last time dialed (time.Time)}
  61. }
  62. func (r *PEXReactor) minReceiveRequestInterval() time.Duration {
  63. // NOTE: must be less than ensurePeersPeriod, otherwise we'll request
  64. // peers too quickly from others and they'll think we're bad!
  65. return r.ensurePeersPeriod / 3
  66. }
  67. // PEXReactorConfig holds reactor specific configuration data.
  68. type PEXReactorConfig struct {
  69. // Seed/Crawler mode
  70. SeedMode bool
  71. // Seeds is a list of addresses reactor may use
  72. // if it can't connect to peers in the addrbook.
  73. Seeds []string
  74. }
  75. type _attemptsToDial struct {
  76. number int
  77. lastDialed time.Time
  78. }
  79. // NewPEXReactor creates new PEX reactor.
  80. func NewPEXReactor(b AddrBook, config *PEXReactorConfig) *PEXReactor {
  81. r := &PEXReactor{
  82. book: b,
  83. config: config,
  84. ensurePeersPeriod: defaultEnsurePeersPeriod,
  85. requestsSent: cmn.NewCMap(),
  86. lastReceivedRequests: cmn.NewCMap(),
  87. }
  88. r.BaseReactor = *p2p.NewBaseReactor("PEXReactor", r)
  89. return r
  90. }
  91. // OnStart implements BaseService
  92. func (r *PEXReactor) OnStart() error {
  93. err := r.book.Start()
  94. if err != nil && err != cmn.ErrAlreadyStarted {
  95. return err
  96. }
  97. numOnline, seedAddrs, err := r.checkSeeds()
  98. if err != nil {
  99. return err
  100. } else if numOnline == 0 && r.book.Empty() {
  101. return errors.New("Address book is empty, and could not connect to any seed nodes")
  102. }
  103. r.seedAddrs = seedAddrs
  104. // Check if this node should run
  105. // in seed/crawler mode
  106. if r.config.SeedMode {
  107. go r.crawlPeersRoutine()
  108. } else {
  109. go r.ensurePeersRoutine()
  110. }
  111. return nil
  112. }
  113. // OnStop implements BaseService
  114. func (r *PEXReactor) OnStop() {
  115. r.book.Stop()
  116. }
  117. // GetChannels implements Reactor
  118. func (r *PEXReactor) GetChannels() []*conn.ChannelDescriptor {
  119. return []*conn.ChannelDescriptor{
  120. {
  121. ID: PexChannel,
  122. Priority: 1,
  123. SendQueueCapacity: 10,
  124. },
  125. }
  126. }
  127. // AddPeer implements Reactor by adding peer to the address book (if inbound)
  128. // or by requesting more addresses (if outbound).
  129. func (r *PEXReactor) AddPeer(p Peer) {
  130. if p.IsOutbound() {
  131. // For outbound peers, the address is already in the books -
  132. // either via DialPeersAsync or r.Receive.
  133. // Ask it for more peers if we need.
  134. if r.book.NeedMoreAddrs() {
  135. r.RequestAddrs(p)
  136. }
  137. } else {
  138. // inbound peer is its own source
  139. addr := p.NodeInfo().NetAddress()
  140. src := addr
  141. // add to book. dont RequestAddrs right away because
  142. // we don't trust inbound as much - let ensurePeersRoutine handle it.
  143. err := r.book.AddAddress(addr, src)
  144. r.logErrAddrBook(err)
  145. }
  146. }
  147. func (r *PEXReactor) logErrAddrBook(err error) {
  148. if err != nil {
  149. switch err.(type) {
  150. case ErrAddrBookNilAddr:
  151. r.Logger.Error("Failed to add new address", "err", err)
  152. default:
  153. // non-routable, self, full book, private, etc.
  154. r.Logger.Debug("Failed to add new address", "err", err)
  155. }
  156. }
  157. }
  158. // RemovePeer implements Reactor.
  159. func (r *PEXReactor) RemovePeer(p Peer, reason interface{}) {
  160. id := string(p.ID())
  161. r.requestsSent.Delete(id)
  162. r.lastReceivedRequests.Delete(id)
  163. }
  164. // Receive implements Reactor by handling incoming PEX messages.
  165. func (r *PEXReactor) Receive(chID byte, src Peer, msgBytes []byte) {
  166. msg, err := decodeMsg(msgBytes)
  167. if err != nil {
  168. r.Logger.Error("Error decoding message", "src", src, "chId", chID, "msg", msg, "err", err, "bytes", msgBytes)
  169. r.Switch.StopPeerForError(src, err)
  170. return
  171. }
  172. r.Logger.Debug("Received message", "src", src, "chId", chID, "msg", msg)
  173. switch msg := msg.(type) {
  174. case *pexRequestMessage:
  175. // NOTE: this is a prime candidate for amplification attacks,
  176. // so it's important we
  177. // 1) restrict how frequently peers can request
  178. // 2) limit the output size
  179. // If we're a seed and this is an inbound peer,
  180. // respond once and disconnect.
  181. if r.config.SeedMode && !src.IsOutbound() {
  182. id := string(src.ID())
  183. v := r.lastReceivedRequests.Get(id)
  184. if v != nil {
  185. // FlushStop/StopPeer are already
  186. // running in a go-routine.
  187. return
  188. }
  189. r.lastReceivedRequests.Set(id, time.Now())
  190. // Send addrs and disconnect
  191. r.SendAddrs(src, r.book.GetSelectionWithBias(biasToSelectNewPeers))
  192. go func() {
  193. // In a go-routine so it doesn't block .Receive.
  194. src.FlushStop()
  195. r.Switch.StopPeerGracefully(src)
  196. }()
  197. } else {
  198. // Check we're not receiving requests too frequently.
  199. if err := r.receiveRequest(src); err != nil {
  200. r.Switch.StopPeerForError(src, err)
  201. return
  202. }
  203. r.SendAddrs(src, r.book.GetSelection())
  204. }
  205. case *pexAddrsMessage:
  206. // If we asked for addresses, add them to the book
  207. if err := r.ReceiveAddrs(msg.Addrs, src); err != nil {
  208. r.Switch.StopPeerForError(src, err)
  209. return
  210. }
  211. default:
  212. r.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg)))
  213. }
  214. }
  215. // enforces a minimum amount of time between requests
  216. func (r *PEXReactor) receiveRequest(src Peer) error {
  217. id := string(src.ID())
  218. v := r.lastReceivedRequests.Get(id)
  219. if v == nil {
  220. // initialize with empty time
  221. lastReceived := time.Time{}
  222. r.lastReceivedRequests.Set(id, lastReceived)
  223. return nil
  224. }
  225. lastReceived := v.(time.Time)
  226. if lastReceived.Equal(time.Time{}) {
  227. // first time gets a free pass. then we start tracking the time
  228. lastReceived = time.Now()
  229. r.lastReceivedRequests.Set(id, lastReceived)
  230. return nil
  231. }
  232. now := time.Now()
  233. minInterval := r.minReceiveRequestInterval()
  234. if now.Sub(lastReceived) < minInterval {
  235. return fmt.Errorf("Peer (%v) sent next PEX request too soon. lastReceived: %v, now: %v, minInterval: %v. Disconnecting",
  236. src.ID(),
  237. lastReceived,
  238. now,
  239. minInterval,
  240. )
  241. }
  242. r.lastReceivedRequests.Set(id, now)
  243. return nil
  244. }
  245. // RequestAddrs asks peer for more addresses if we do not already
  246. // have a request out for this peer.
  247. func (r *PEXReactor) RequestAddrs(p Peer) {
  248. r.Logger.Debug("Request addrs", "from", p)
  249. id := string(p.ID())
  250. if r.requestsSent.Has(id) {
  251. return
  252. }
  253. r.requestsSent.Set(id, struct{}{})
  254. p.Send(PexChannel, cdc.MustMarshalBinaryBare(&pexRequestMessage{}))
  255. }
  256. // ReceiveAddrs adds the given addrs to the addrbook if theres an open
  257. // request for this peer and deletes the open request.
  258. // If there's no open request for the src peer, it returns an error.
  259. func (r *PEXReactor) ReceiveAddrs(addrs []*p2p.NetAddress, src Peer) error {
  260. id := string(src.ID())
  261. if !r.requestsSent.Has(id) {
  262. return errors.New("Unsolicited pexAddrsMessage")
  263. }
  264. r.requestsSent.Delete(id)
  265. srcAddr := src.NodeInfo().NetAddress()
  266. for _, netAddr := range addrs {
  267. // Validate netAddr. Disconnect from a peer if it sends us invalid data.
  268. if netAddr == nil {
  269. return errors.New("nil address in pexAddrsMessage")
  270. }
  271. // TODO: extract validating logic from NewNetAddressStringWithOptionalID
  272. // and put it in netAddr#Valid (#2722)
  273. na, err := p2p.NewNetAddressString(netAddr.String())
  274. if err != nil {
  275. return fmt.Errorf("%s address in pexAddrsMessage is invalid: %v",
  276. netAddr.String(),
  277. err,
  278. )
  279. }
  280. // NOTE: we check netAddr validity and routability in book#AddAddress.
  281. err = r.book.AddAddress(na, srcAddr)
  282. if err != nil {
  283. r.logErrAddrBook(err)
  284. // XXX: should we be strict about incoming data and disconnect from a
  285. // peer here too?
  286. continue
  287. }
  288. // If this address came from a seed node, try to connect to it without
  289. // waiting.
  290. for _, seedAddr := range r.seedAddrs {
  291. if seedAddr.Equals(srcAddr) {
  292. r.ensurePeers()
  293. }
  294. }
  295. }
  296. return nil
  297. }
  298. // SendAddrs sends addrs to the peer.
  299. func (r *PEXReactor) SendAddrs(p Peer, netAddrs []*p2p.NetAddress) {
  300. p.Send(PexChannel, cdc.MustMarshalBinaryBare(&pexAddrsMessage{Addrs: netAddrs}))
  301. }
  302. // SetEnsurePeersPeriod sets period to ensure peers connected.
  303. func (r *PEXReactor) SetEnsurePeersPeriod(d time.Duration) {
  304. r.ensurePeersPeriod = d
  305. }
  306. // Ensures that sufficient peers are connected. (continuous)
  307. func (r *PEXReactor) ensurePeersRoutine() {
  308. var (
  309. seed = cmn.NewRand()
  310. jitter = seed.Int63n(r.ensurePeersPeriod.Nanoseconds())
  311. )
  312. // Randomize first round of communication to avoid thundering herd.
  313. // If no potential peers are present directly start connecting so we guarantee
  314. // swift setup with the help of configured seeds.
  315. if r.hasPotentialPeers() {
  316. time.Sleep(time.Duration(jitter))
  317. }
  318. // fire once immediately.
  319. // ensures we dial the seeds right away if the book is empty
  320. r.ensurePeers()
  321. // fire periodically
  322. ticker := time.NewTicker(r.ensurePeersPeriod)
  323. for {
  324. select {
  325. case <-ticker.C:
  326. r.ensurePeers()
  327. case <-r.Quit():
  328. ticker.Stop()
  329. return
  330. }
  331. }
  332. }
  333. // ensurePeers ensures that sufficient peers are connected. (once)
  334. //
  335. // heuristic that we haven't perfected yet, or, perhaps is manually edited by
  336. // the node operator. It should not be used to compute what addresses are
  337. // already connected or not.
  338. func (r *PEXReactor) ensurePeers() {
  339. var (
  340. out, in, dial = r.Switch.NumPeers()
  341. numToDial = r.Switch.MaxNumOutboundPeers() - (out + dial)
  342. )
  343. r.Logger.Info(
  344. "Ensure peers",
  345. "numOutPeers", out,
  346. "numInPeers", in,
  347. "numDialing", dial,
  348. "numToDial", numToDial,
  349. )
  350. if numToDial <= 0 {
  351. return
  352. }
  353. // bias to prefer more vetted peers when we have fewer connections.
  354. // not perfect, but somewhate ensures that we prioritize connecting to more-vetted
  355. // NOTE: range here is [10, 90]. Too high ?
  356. newBias := cmn.MinInt(out, 8)*10 + 10
  357. toDial := make(map[p2p.ID]*p2p.NetAddress)
  358. // Try maxAttempts times to pick numToDial addresses to dial
  359. maxAttempts := numToDial * 3
  360. for i := 0; i < maxAttempts && len(toDial) < numToDial; i++ {
  361. try := r.book.PickAddress(newBias)
  362. if try == nil {
  363. continue
  364. }
  365. if _, selected := toDial[try.ID]; selected {
  366. continue
  367. }
  368. if r.Switch.IsDialingOrExistingAddress(try) {
  369. continue
  370. }
  371. // TODO: consider moving some checks from toDial into here
  372. // so we don't even consider dialing peers that we want to wait
  373. // before dialling again, or have dialed too many times already
  374. r.Logger.Info("Will dial address", "addr", try)
  375. toDial[try.ID] = try
  376. }
  377. // Dial picked addresses
  378. for _, addr := range toDial {
  379. go r.dialPeer(addr)
  380. }
  381. // If we need more addresses, pick a random peer and ask for more.
  382. if r.book.NeedMoreAddrs() {
  383. peers := r.Switch.Peers().List()
  384. peersCount := len(peers)
  385. if peersCount > 0 {
  386. peer := peers[cmn.RandInt()%peersCount] // nolint: gas
  387. r.Logger.Info("We need more addresses. Sending pexRequest to random peer", "peer", peer)
  388. r.RequestAddrs(peer)
  389. }
  390. }
  391. // If we are not connected to nor dialing anybody, fallback to dialing a seed.
  392. if out+in+dial+len(toDial) == 0 {
  393. r.Logger.Info("No addresses to dial nor connected peers. Falling back to seeds")
  394. r.dialSeeds()
  395. }
  396. }
  397. func (r *PEXReactor) dialAttemptsInfo(addr *p2p.NetAddress) (attempts int, lastDialed time.Time) {
  398. _attempts, ok := r.attemptsToDial.Load(addr.DialString())
  399. if !ok {
  400. return
  401. }
  402. atd := _attempts.(_attemptsToDial)
  403. return atd.number, atd.lastDialed
  404. }
  405. func (r *PEXReactor) dialPeer(addr *p2p.NetAddress) {
  406. attempts, lastDialed := r.dialAttemptsInfo(addr)
  407. if attempts > maxAttemptsToDial {
  408. // Do not log the message if the addr gets readded.
  409. if attempts+1 == maxAttemptsToDial {
  410. r.Logger.Info("Reached max attempts to dial", "addr", addr, "attempts", attempts)
  411. r.attemptsToDial.Store(addr.DialString(), _attemptsToDial{attempts + 1, time.Now()})
  412. }
  413. r.book.MarkBad(addr)
  414. return
  415. }
  416. // exponential backoff if it's not our first attempt to dial given address
  417. if attempts > 0 {
  418. jitterSeconds := time.Duration(cmn.RandFloat64() * float64(time.Second)) // 1s == (1e9 ns)
  419. backoffDuration := jitterSeconds + ((1 << uint(attempts)) * time.Second)
  420. sinceLastDialed := time.Since(lastDialed)
  421. if sinceLastDialed < backoffDuration {
  422. r.Logger.Debug("Too early to dial", "addr", addr, "backoff_duration", backoffDuration, "last_dialed", lastDialed, "time_since", sinceLastDialed)
  423. return
  424. }
  425. }
  426. err := r.Switch.DialPeerWithAddress(addr, false)
  427. if err != nil {
  428. r.Logger.Error("Dialing failed", "addr", addr, "err", err, "attempts", attempts)
  429. // TODO: detect more "bad peer" scenarios
  430. if _, ok := err.(p2p.ErrSwitchAuthenticationFailure); ok {
  431. r.book.MarkBad(addr)
  432. r.attemptsToDial.Delete(addr.DialString())
  433. } else {
  434. r.book.MarkAttempt(addr)
  435. // FIXME: if the addr is going to be removed from the addrbook (hard to
  436. // tell at this point), we need to Delete it from attemptsToDial, not
  437. // record another attempt.
  438. // record attempt
  439. r.attemptsToDial.Store(addr.DialString(), _attemptsToDial{attempts + 1, time.Now()})
  440. }
  441. } else {
  442. // cleanup any history
  443. r.attemptsToDial.Delete(addr.DialString())
  444. }
  445. }
  446. // checkSeeds checks that addresses are well formed.
  447. // Returns number of seeds we can connect to, along with all seeds addrs.
  448. // return err if user provided any badly formatted seed addresses.
  449. // Doesn't error if the seed node can't be reached.
  450. // numOnline returns -1 if no seed nodes were in the initial configuration.
  451. func (r *PEXReactor) checkSeeds() (numOnline int, netAddrs []*p2p.NetAddress, err error) {
  452. lSeeds := len(r.config.Seeds)
  453. if lSeeds == 0 {
  454. return -1, nil, nil
  455. }
  456. netAddrs, errs := p2p.NewNetAddressStrings(r.config.Seeds)
  457. numOnline = lSeeds - len(errs)
  458. for _, err := range errs {
  459. switch e := err.(type) {
  460. case p2p.ErrNetAddressLookup:
  461. r.Logger.Error("Connecting to seed failed", "err", e)
  462. default:
  463. return 0, nil, errors.Wrap(e, "seed node configuration has error")
  464. }
  465. }
  466. return
  467. }
  468. // randomly dial seeds until we connect to one or exhaust them
  469. func (r *PEXReactor) dialSeeds() {
  470. perm := cmn.RandPerm(len(r.seedAddrs))
  471. // perm := r.Switch.rng.Perm(lSeeds)
  472. for _, i := range perm {
  473. // dial a random seed
  474. seedAddr := r.seedAddrs[i]
  475. err := r.Switch.DialPeerWithAddress(seedAddr, false)
  476. if err == nil {
  477. return
  478. }
  479. r.Switch.Logger.Error("Error dialing seed", "err", err, "seed", seedAddr)
  480. }
  481. r.Switch.Logger.Error("Couldn't connect to any seeds")
  482. }
  483. // AttemptsToDial returns the number of attempts to dial specific address. It
  484. // returns 0 if never attempted or successfully connected.
  485. func (r *PEXReactor) AttemptsToDial(addr *p2p.NetAddress) int {
  486. lAttempts, attempted := r.attemptsToDial.Load(addr.DialString())
  487. if attempted {
  488. return lAttempts.(_attemptsToDial).number
  489. }
  490. return 0
  491. }
  492. //----------------------------------------------------------
  493. // Explores the network searching for more peers. (continuous)
  494. // Seed/Crawler Mode causes this node to quickly disconnect
  495. // from peers, except other seed nodes.
  496. func (r *PEXReactor) crawlPeersRoutine() {
  497. // Do an initial crawl
  498. r.crawlPeers()
  499. // Fire periodically
  500. ticker := time.NewTicker(defaultCrawlPeersPeriod)
  501. for {
  502. select {
  503. case <-ticker.C:
  504. r.attemptDisconnects()
  505. r.crawlPeers()
  506. case <-r.Quit():
  507. return
  508. }
  509. }
  510. }
  511. // hasPotentialPeers indicates if there is a potential peer to connect to, by
  512. // consulting the Switch as well as the AddrBook.
  513. func (r *PEXReactor) hasPotentialPeers() bool {
  514. out, in, dial := r.Switch.NumPeers()
  515. return out+in+dial > 0 && len(r.book.ListOfKnownAddresses()) > 0
  516. }
  517. // crawlPeerInfo handles temporary data needed for the
  518. // network crawling performed during seed/crawler mode.
  519. type crawlPeerInfo struct {
  520. // The listening address of a potential peer we learned about
  521. Addr *p2p.NetAddress
  522. // The last time we attempt to reach this address
  523. LastAttempt time.Time
  524. // The last time we successfully reached this address
  525. LastSuccess time.Time
  526. }
  527. // oldestFirst implements sort.Interface for []crawlPeerInfo
  528. // based on the LastAttempt field.
  529. type oldestFirst []crawlPeerInfo
  530. func (of oldestFirst) Len() int { return len(of) }
  531. func (of oldestFirst) Swap(i, j int) { of[i], of[j] = of[j], of[i] }
  532. func (of oldestFirst) Less(i, j int) bool { return of[i].LastAttempt.Before(of[j].LastAttempt) }
  533. // getPeersToCrawl returns addresses of potential peers that we wish to validate.
  534. // NOTE: The status information is ordered as described above.
  535. func (r *PEXReactor) getPeersToCrawl() []crawlPeerInfo {
  536. // TODO: be more selective
  537. addrs := r.book.ListOfKnownAddresses()
  538. of := make(oldestFirst, 0, len(addrs))
  539. for _, addr := range addrs {
  540. if len(addr.ID()) == 0 {
  541. continue // dont use peers without id
  542. }
  543. of = append(of, crawlPeerInfo{
  544. Addr: addr.Addr,
  545. LastAttempt: addr.LastAttempt,
  546. LastSuccess: addr.LastSuccess,
  547. })
  548. }
  549. sort.Sort(of)
  550. return of
  551. }
  552. // crawlPeers will crawl the network looking for new peer addresses. (once)
  553. func (r *PEXReactor) crawlPeers() {
  554. peerInfos := r.getPeersToCrawl()
  555. now := time.Now()
  556. // Use addresses we know of to reach additional peers
  557. for _, pi := range peerInfos {
  558. // Do not attempt to connect with peers we recently dialed
  559. if now.Sub(pi.LastAttempt) < defaultCrawlPeerInterval {
  560. continue
  561. }
  562. // Otherwise, attempt to connect with the known address
  563. err := r.Switch.DialPeerWithAddress(pi.Addr, false)
  564. if err != nil {
  565. r.book.MarkAttempt(pi.Addr)
  566. continue
  567. }
  568. // Ask for more addresses
  569. peer := r.Switch.Peers().Get(pi.Addr.ID)
  570. if peer != nil {
  571. r.RequestAddrs(peer)
  572. }
  573. }
  574. }
  575. // attemptDisconnects checks if we've been with each peer long enough to disconnect
  576. func (r *PEXReactor) attemptDisconnects() {
  577. for _, peer := range r.Switch.Peers().List() {
  578. if peer.Status().Duration < defaultSeedDisconnectWaitPeriod {
  579. continue
  580. }
  581. if peer.IsPersistent() {
  582. continue
  583. }
  584. r.Switch.StopPeerGracefully(peer)
  585. }
  586. }
  587. //-----------------------------------------------------------------------------
  588. // Messages
  589. // PexMessage is a primary type for PEX messages. Underneath, it could contain
  590. // either pexRequestMessage, or pexAddrsMessage messages.
  591. type PexMessage interface{}
  592. func RegisterPexMessage(cdc *amino.Codec) {
  593. cdc.RegisterInterface((*PexMessage)(nil), nil)
  594. cdc.RegisterConcrete(&pexRequestMessage{}, "tendermint/p2p/PexRequestMessage", nil)
  595. cdc.RegisterConcrete(&pexAddrsMessage{}, "tendermint/p2p/PexAddrsMessage", nil)
  596. }
  597. func decodeMsg(bz []byte) (msg PexMessage, err error) {
  598. if len(bz) > maxMsgSize {
  599. return msg, fmt.Errorf("Msg exceeds max size (%d > %d)", len(bz), maxMsgSize)
  600. }
  601. err = cdc.UnmarshalBinaryBare(bz, &msg)
  602. return
  603. }
  604. /*
  605. A pexRequestMessage requests additional peer addresses.
  606. */
  607. type pexRequestMessage struct {
  608. }
  609. func (m *pexRequestMessage) String() string {
  610. return "[pexRequest]"
  611. }
  612. /*
  613. A message with announced peer addresses.
  614. */
  615. type pexAddrsMessage struct {
  616. Addrs []*p2p.NetAddress
  617. }
  618. func (m *pexAddrsMessage) String() string {
  619. return fmt.Sprintf("[pexAddrs %v]", m.Addrs)
  620. }