You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

430 lines
14 KiB

  1. package pex
  2. import (
  3. "fmt"
  4. "runtime/debug"
  5. "sync"
  6. "time"
  7. "github.com/tendermint/tendermint/internal/p2p"
  8. "github.com/tendermint/tendermint/internal/p2p/conn"
  9. "github.com/tendermint/tendermint/libs/log"
  10. tmmath "github.com/tendermint/tendermint/libs/math"
  11. "github.com/tendermint/tendermint/libs/service"
  12. protop2p "github.com/tendermint/tendermint/proto/tendermint/p2p"
  13. "github.com/tendermint/tendermint/types"
  14. )
  15. var (
  16. _ service.Service = (*Reactor)(nil)
  17. _ p2p.Wrapper = (*protop2p.PexMessage)(nil)
  18. )
  19. const (
  20. // PexChannel is a channel for PEX messages
  21. PexChannel = 0x00
  22. // over-estimate of max NetAddress size
  23. // hexID (40) + IP (16) + Port (2) + Name (100) ...
  24. // NOTE: dont use massive DNS name ..
  25. maxAddressSize = 256
  26. // max addresses returned by GetSelection
  27. // NOTE: this must match "maxMsgSize"
  28. maxGetSelection = 250
  29. // NOTE: amplification factor!
  30. // small request results in up to maxMsgSize response
  31. maxMsgSize = maxAddressSize * maxGetSelection
  32. // the minimum time one peer can send another request to the same peer
  33. minReceiveRequestInterval = 100 * time.Millisecond
  34. // the maximum amount of addresses that can be included in a response
  35. maxAddresses uint16 = 100
  36. // How long to wait when there are no peers available before trying again
  37. noAvailablePeersWaitPeriod = 1 * time.Second
  38. // indicates the ping rate of the pex reactor when the peer store is full.
  39. // The reactor should still look to add new peers in order to flush out low
  40. // scoring peers that are still in the peer store
  41. fullCapacityInterval = 10 * time.Minute
  42. )
  43. // TODO: We should decide whether we want channel descriptors to be housed
  44. // within each reactor (as they are now) or, considering that the reactor doesn't
  45. // really need to care about the channel descriptors, if they should be housed
  46. // in the node module.
  47. func ChannelDescriptor() *conn.ChannelDescriptor {
  48. return &conn.ChannelDescriptor{
  49. ID: PexChannel,
  50. MessageType: new(protop2p.PexMessage),
  51. Priority: 1,
  52. SendQueueCapacity: 10,
  53. RecvMessageCapacity: maxMsgSize,
  54. RecvBufferCapacity: 128,
  55. }
  56. }
  57. // The peer exchange or PEX reactor supports the peer manager by sending
  58. // requests to other peers for addresses that can be given to the peer manager
  59. // and at the same time advertises addresses to peers that need more.
  60. //
  61. // The reactor is able to tweak the intensity of it's search by decreasing or
  62. // increasing the interval between each request. It tracks connected peers via
  63. // a linked list, sending a request to the node at the front of the list and
  64. // adding it to the back of the list once a response is received.
  65. type Reactor struct {
  66. service.BaseService
  67. peerManager *p2p.PeerManager
  68. pexCh *p2p.Channel
  69. peerUpdates *p2p.PeerUpdates
  70. closeCh chan struct{}
  71. // list of available peers to loop through and send peer requests to
  72. availablePeers map[types.NodeID]struct{}
  73. mtx sync.RWMutex
  74. // requestsSent keeps track of which peers the PEX reactor has sent requests
  75. // to. This prevents the sending of spurious responses.
  76. // NOTE: If a node never responds, they will remain in this map until a
  77. // peer down status update is sent
  78. requestsSent map[types.NodeID]struct{}
  79. // lastReceivedRequests keeps track of when peers send a request to prevent
  80. // peers from sending requests too often (as defined by
  81. // minReceiveRequestInterval).
  82. lastReceivedRequests map[types.NodeID]time.Time
  83. // the time when another request will be sent
  84. nextRequestTime time.Time
  85. // keep track of how many new peers to existing peers we have received to
  86. // extrapolate the size of the network
  87. newPeers uint32
  88. totalPeers uint32
  89. // discoveryRatio is the inverse ratio of new peers to old peers squared.
  90. // This is multiplied by the minimum duration to calculate how long to wait
  91. // between each request.
  92. discoveryRatio float32
  93. }
  94. // NewReactor returns a reference to a new reactor.
  95. func NewReactor(
  96. logger log.Logger,
  97. peerManager *p2p.PeerManager,
  98. pexCh *p2p.Channel,
  99. peerUpdates *p2p.PeerUpdates,
  100. ) *Reactor {
  101. r := &Reactor{
  102. peerManager: peerManager,
  103. pexCh: pexCh,
  104. peerUpdates: peerUpdates,
  105. closeCh: make(chan struct{}),
  106. availablePeers: make(map[types.NodeID]struct{}),
  107. requestsSent: make(map[types.NodeID]struct{}),
  108. lastReceivedRequests: make(map[types.NodeID]time.Time),
  109. }
  110. r.BaseService = *service.NewBaseService(logger, "PEX", r)
  111. return r
  112. }
  113. // OnStart starts separate go routines for each p2p Channel and listens for
  114. // envelopes on each. In addition, it also listens for peer updates and handles
  115. // messages on that p2p channel accordingly. The caller must be sure to execute
  116. // OnStop to ensure the outbound p2p Channels are closed.
  117. func (r *Reactor) OnStart() error {
  118. go r.processPexCh()
  119. go r.processPeerUpdates()
  120. return nil
  121. }
  122. // OnStop stops the reactor by signaling to all spawned goroutines to exit and
  123. // blocking until they all exit.
  124. func (r *Reactor) OnStop() {
  125. // Close closeCh to signal to all spawned goroutines to gracefully exit. All
  126. // p2p Channels should execute Close().
  127. close(r.closeCh)
  128. // Wait for all p2p Channels to be closed before returning. This ensures we
  129. // can easily reason about synchronization of all p2p Channels and ensure no
  130. // panics will occur.
  131. <-r.pexCh.Done()
  132. <-r.peerUpdates.Done()
  133. }
  134. // processPexCh implements a blocking event loop where we listen for p2p
  135. // Envelope messages from the pexCh.
  136. func (r *Reactor) processPexCh() {
  137. defer r.pexCh.Close()
  138. for {
  139. select {
  140. case <-r.closeCh:
  141. r.Logger.Debug("stopped listening on PEX channel; closing...")
  142. return
  143. // outbound requests for new peers
  144. case <-r.waitUntilNextRequest():
  145. r.sendRequestForPeers()
  146. // inbound requests for new peers or responses to requests sent by this
  147. // reactor
  148. case envelope := <-r.pexCh.In:
  149. if err := r.handleMessage(r.pexCh.ID, envelope); err != nil {
  150. r.Logger.Error("failed to process message", "ch_id", r.pexCh.ID, "envelope", envelope, "err", err)
  151. r.pexCh.Error <- p2p.PeerError{
  152. NodeID: envelope.From,
  153. Err: err,
  154. }
  155. }
  156. }
  157. }
  158. }
  159. // processPeerUpdates initiates a blocking process where we listen for and handle
  160. // PeerUpdate messages. When the reactor is stopped, we will catch the signal and
  161. // close the p2p PeerUpdatesCh gracefully.
  162. func (r *Reactor) processPeerUpdates() {
  163. defer r.peerUpdates.Close()
  164. for {
  165. select {
  166. case peerUpdate := <-r.peerUpdates.Updates():
  167. r.processPeerUpdate(peerUpdate)
  168. case <-r.closeCh:
  169. r.Logger.Debug("stopped listening on peer updates channel; closing...")
  170. return
  171. }
  172. }
  173. }
  174. // handlePexMessage handles envelopes sent from peers on the PexChannel.
  175. func (r *Reactor) handlePexMessage(envelope p2p.Envelope) error {
  176. logger := r.Logger.With("peer", envelope.From)
  177. switch msg := envelope.Message.(type) {
  178. case *protop2p.PexRequest:
  179. // check if the peer hasn't sent a prior request too close to this one
  180. // in time
  181. if err := r.markPeerRequest(envelope.From); err != nil {
  182. return err
  183. }
  184. // request peers from the peer manager and parse the NodeAddresses into
  185. // URL strings
  186. nodeAddresses := r.peerManager.Advertise(envelope.From, maxAddresses)
  187. pexAddresses := make([]protop2p.PexAddress, len(nodeAddresses))
  188. for idx, addr := range nodeAddresses {
  189. pexAddresses[idx] = protop2p.PexAddress{
  190. URL: addr.String(),
  191. }
  192. }
  193. r.pexCh.Out <- p2p.Envelope{
  194. To: envelope.From,
  195. Message: &protop2p.PexResponse{Addresses: pexAddresses},
  196. }
  197. case *protop2p.PexResponse:
  198. // check if the response matches a request that was made to that peer
  199. if err := r.markPeerResponse(envelope.From); err != nil {
  200. return err
  201. }
  202. // check the size of the response
  203. if len(msg.Addresses) > int(maxAddresses) {
  204. return fmt.Errorf("peer sent too many addresses (max: %d, got: %d)",
  205. maxAddresses,
  206. len(msg.Addresses),
  207. )
  208. }
  209. for _, pexAddress := range msg.Addresses {
  210. peerAddress, err := p2p.ParseNodeAddress(pexAddress.URL)
  211. if err != nil {
  212. continue
  213. }
  214. added, err := r.peerManager.Add(peerAddress)
  215. if err != nil {
  216. logger.Error("failed to add PEX address", "address", peerAddress, "err", err)
  217. }
  218. if added {
  219. r.newPeers++
  220. logger.Debug("added PEX address", "address", peerAddress)
  221. }
  222. r.totalPeers++
  223. }
  224. default:
  225. return fmt.Errorf("received unknown message: %T", msg)
  226. }
  227. return nil
  228. }
  229. // handleMessage handles an Envelope sent from a peer on a specific p2p Channel.
  230. // It will handle errors and any possible panics gracefully. A caller can handle
  231. // any error returned by sending a PeerError on the respective channel.
  232. func (r *Reactor) handleMessage(chID p2p.ChannelID, envelope p2p.Envelope) (err error) {
  233. defer func() {
  234. if e := recover(); e != nil {
  235. err = fmt.Errorf("panic in processing message: %v", e)
  236. r.Logger.Error(
  237. "recovering from processing message panic",
  238. "err", err,
  239. "stack", string(debug.Stack()),
  240. )
  241. }
  242. }()
  243. r.Logger.Debug("received PEX message", "peer", envelope.From)
  244. switch chID {
  245. case p2p.ChannelID(PexChannel):
  246. err = r.handlePexMessage(envelope)
  247. default:
  248. err = fmt.Errorf("unknown channel ID (%d) for envelope (%v)", chID, envelope)
  249. }
  250. return err
  251. }
  252. // processPeerUpdate processes a PeerUpdate. For added peers, PeerStatusUp, we
  253. // send a request for addresses.
  254. func (r *Reactor) processPeerUpdate(peerUpdate p2p.PeerUpdate) {
  255. r.Logger.Debug("received PEX peer update", "peer", peerUpdate.NodeID, "status", peerUpdate.Status)
  256. r.mtx.Lock()
  257. defer r.mtx.Unlock()
  258. switch peerUpdate.Status {
  259. case p2p.PeerStatusUp:
  260. r.availablePeers[peerUpdate.NodeID] = struct{}{}
  261. case p2p.PeerStatusDown:
  262. delete(r.availablePeers, peerUpdate.NodeID)
  263. delete(r.requestsSent, peerUpdate.NodeID)
  264. delete(r.lastReceivedRequests, peerUpdate.NodeID)
  265. default:
  266. }
  267. }
  268. func (r *Reactor) waitUntilNextRequest() <-chan time.Time {
  269. return time.After(time.Until(r.nextRequestTime))
  270. }
  271. // sendRequestForPeers pops the first peerID off the list and sends the
  272. // peer a request for more peer addresses. The function then moves the
  273. // peer into the requestsSent bucket and calculates when the next request
  274. // time should be
  275. func (r *Reactor) sendRequestForPeers() {
  276. r.mtx.Lock()
  277. defer r.mtx.Unlock()
  278. if len(r.availablePeers) == 0 {
  279. // no peers are available
  280. r.Logger.Debug("no available peers to send request to, waiting...")
  281. r.nextRequestTime = time.Now().Add(noAvailablePeersWaitPeriod)
  282. return
  283. }
  284. var peerID types.NodeID
  285. // use range to get a random peer.
  286. for peerID = range r.availablePeers {
  287. break
  288. }
  289. // send out the pex request
  290. r.pexCh.Out <- p2p.Envelope{
  291. To: peerID,
  292. Message: &protop2p.PexRequest{},
  293. }
  294. // remove the peer from the abvailable peers list and mark it in the requestsSent map
  295. delete(r.availablePeers, peerID)
  296. r.requestsSent[peerID] = struct{}{}
  297. r.calculateNextRequestTime()
  298. r.Logger.Debug("peer request sent", "next_request_time", r.nextRequestTime)
  299. }
  300. // calculateNextRequestTime implements something of a proportional controller
  301. // to estimate how often the reactor should be requesting new peer addresses.
  302. // The dependent variable in this calculation is the ratio of new peers to
  303. // all peers that the reactor receives. The interval is thus calculated as the
  304. // inverse squared. In the beginning, all peers should be new peers.
  305. // We expect this ratio to be near 1 and thus the interval to be as short
  306. // as possible. As the node becomes more familiar with the network the ratio of
  307. // new nodes will plummet to a very small number, meaning the interval expands
  308. // to its upper bound.
  309. // CONTRACT: Must use a write lock as nextRequestTime is updated
  310. func (r *Reactor) calculateNextRequestTime() {
  311. // check if the peer store is full. If so then there is no need
  312. // to send peer requests too often
  313. if ratio := r.peerManager.PeerRatio(); ratio >= 0.95 {
  314. r.Logger.Debug("peer manager near full ratio, sleeping...",
  315. "sleep_period", fullCapacityInterval, "ratio", ratio)
  316. r.nextRequestTime = time.Now().Add(fullCapacityInterval)
  317. return
  318. }
  319. // baseTime represents the shortest interval that we can send peer requests
  320. // in. For example if we have 10 peers and we can't send a message to the
  321. // same peer every 500ms, then we can send a request every 50ms. In practice
  322. // we use a safety margin of 2, ergo 100ms
  323. peers := tmmath.MinInt(len(r.availablePeers), 50)
  324. baseTime := minReceiveRequestInterval
  325. if peers > 0 {
  326. baseTime = minReceiveRequestInterval * 2 / time.Duration(peers)
  327. }
  328. if r.totalPeers > 0 || r.discoveryRatio == 0 {
  329. // find the ratio of new peers. NOTE: We add 1 to both sides to avoid
  330. // divide by zero problems
  331. ratio := float32(r.totalPeers+1) / float32(r.newPeers+1)
  332. // square the ratio in order to get non linear time intervals
  333. // NOTE: The longest possible interval for a network with 100 or more peers
  334. // where a node is connected to 50 of them is 2 minutes.
  335. r.discoveryRatio = ratio * ratio
  336. r.newPeers = 0
  337. r.totalPeers = 0
  338. }
  339. // NOTE: As ratio is always >= 1, discovery ratio is >= 1. Therefore we don't need to worry
  340. // about the next request time being less than the minimum time
  341. r.nextRequestTime = time.Now().Add(baseTime * time.Duration(r.discoveryRatio))
  342. }
  343. func (r *Reactor) markPeerRequest(peer types.NodeID) error {
  344. r.mtx.Lock()
  345. defer r.mtx.Unlock()
  346. if lastRequestTime, ok := r.lastReceivedRequests[peer]; ok {
  347. if time.Now().Before(lastRequestTime.Add(minReceiveRequestInterval)) {
  348. return fmt.Errorf("peer sent a request too close after a prior one. Minimum interval: %v",
  349. minReceiveRequestInterval)
  350. }
  351. }
  352. r.lastReceivedRequests[peer] = time.Now()
  353. return nil
  354. }
  355. func (r *Reactor) markPeerResponse(peer types.NodeID) error {
  356. r.mtx.Lock()
  357. defer r.mtx.Unlock()
  358. // check if a request to this peer was sent
  359. if _, ok := r.requestsSent[peer]; !ok {
  360. return fmt.Errorf("peer sent a PEX response when none was requested (%v)", peer)
  361. }
  362. delete(r.requestsSent, peer)
  363. // attach to the back of the list so that the peer can be used again for
  364. // future requests
  365. r.availablePeers[peer] = struct{}{}
  366. return nil
  367. }