You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

449 lines
14 KiB

  1. package pex
  2. import (
  3. "context"
  4. "fmt"
  5. "runtime/debug"
  6. "sync"
  7. "time"
  8. "github.com/tendermint/tendermint/internal/p2p"
  9. "github.com/tendermint/tendermint/internal/p2p/conn"
  10. "github.com/tendermint/tendermint/libs/log"
  11. tmmath "github.com/tendermint/tendermint/libs/math"
  12. "github.com/tendermint/tendermint/libs/service"
  13. protop2p "github.com/tendermint/tendermint/proto/tendermint/p2p"
  14. "github.com/tendermint/tendermint/types"
  15. )
  16. var (
  17. _ service.Service = (*Reactor)(nil)
  18. _ p2p.Wrapper = (*protop2p.PexMessage)(nil)
  19. )
  20. const (
  21. // PexChannel is a channel for PEX messages
  22. PexChannel = 0x00
  23. // over-estimate of max NetAddress size
  24. // hexID (40) + IP (16) + Port (2) + Name (100) ...
  25. // NOTE: dont use massive DNS name ..
  26. maxAddressSize = 256
  27. // max addresses returned by GetSelection
  28. // NOTE: this must match "maxMsgSize"
  29. maxGetSelection = 250
  30. // NOTE: amplification factor!
  31. // small request results in up to maxMsgSize response
  32. maxMsgSize = maxAddressSize * maxGetSelection
  33. // the minimum time one peer can send another request to the same peer
  34. minReceiveRequestInterval = 100 * time.Millisecond
  35. // the maximum amount of addresses that can be included in a response
  36. maxAddresses uint16 = 100
  37. // How long to wait when there are no peers available before trying again
  38. noAvailablePeersWaitPeriod = 1 * time.Second
  39. // indicates the ping rate of the pex reactor when the peer store is full.
  40. // The reactor should still look to add new peers in order to flush out low
  41. // scoring peers that are still in the peer store
  42. fullCapacityInterval = 10 * time.Minute
  43. )
  44. // TODO: We should decide whether we want channel descriptors to be housed
  45. // within each reactor (as they are now) or, considering that the reactor doesn't
  46. // really need to care about the channel descriptors, if they should be housed
  47. // in the node module.
  48. func ChannelDescriptor() *conn.ChannelDescriptor {
  49. return &conn.ChannelDescriptor{
  50. ID: PexChannel,
  51. MessageType: new(protop2p.PexMessage),
  52. Priority: 1,
  53. SendQueueCapacity: 10,
  54. RecvMessageCapacity: maxMsgSize,
  55. RecvBufferCapacity: 128,
  56. }
  57. }
  58. // The peer exchange or PEX reactor supports the peer manager by sending
  59. // requests to other peers for addresses that can be given to the peer manager
  60. // and at the same time advertises addresses to peers that need more.
  61. //
  62. // The reactor is able to tweak the intensity of it's search by decreasing or
  63. // increasing the interval between each request. It tracks connected peers via
  64. // a linked list, sending a request to the node at the front of the list and
  65. // adding it to the back of the list once a response is received.
  66. type Reactor struct {
  67. service.BaseService
  68. logger log.Logger
  69. peerManager *p2p.PeerManager
  70. pexCh *p2p.Channel
  71. peerUpdates *p2p.PeerUpdates
  72. // list of available peers to loop through and send peer requests to
  73. availablePeers map[types.NodeID]struct{}
  74. mtx sync.RWMutex
  75. // requestsSent keeps track of which peers the PEX reactor has sent requests
  76. // to. This prevents the sending of spurious responses.
  77. // NOTE: If a node never responds, they will remain in this map until a
  78. // peer down status update is sent
  79. requestsSent map[types.NodeID]struct{}
  80. // lastReceivedRequests keeps track of when peers send a request to prevent
  81. // peers from sending requests too often (as defined by
  82. // minReceiveRequestInterval).
  83. lastReceivedRequests map[types.NodeID]time.Time
  84. // keep track of how many new peers to existing peers we have received to
  85. // extrapolate the size of the network
  86. newPeers uint32
  87. totalPeers uint32
  88. // discoveryRatio is the inverse ratio of new peers to old peers squared.
  89. // This is multiplied by the minimum duration to calculate how long to wait
  90. // between each request.
  91. discoveryRatio float32
  92. }
  93. // NewReactor returns a reference to a new reactor.
  94. func NewReactor(
  95. ctx context.Context,
  96. logger log.Logger,
  97. peerManager *p2p.PeerManager,
  98. channelCreator p2p.ChannelCreator,
  99. peerUpdates *p2p.PeerUpdates,
  100. ) (*Reactor, error) {
  101. channel, err := channelCreator(ctx, ChannelDescriptor())
  102. if err != nil {
  103. return nil, err
  104. }
  105. r := &Reactor{
  106. logger: logger,
  107. peerManager: peerManager,
  108. pexCh: channel,
  109. peerUpdates: peerUpdates,
  110. availablePeers: make(map[types.NodeID]struct{}),
  111. requestsSent: make(map[types.NodeID]struct{}),
  112. lastReceivedRequests: make(map[types.NodeID]time.Time),
  113. }
  114. r.BaseService = *service.NewBaseService(logger, "PEX", r)
  115. return r, nil
  116. }
  117. // OnStart starts separate go routines for each p2p Channel and listens for
  118. // envelopes on each. In addition, it also listens for peer updates and handles
  119. // messages on that p2p channel accordingly. The caller must be sure to execute
  120. // OnStop to ensure the outbound p2p Channels are closed.
  121. func (r *Reactor) OnStart(ctx context.Context) error {
  122. go r.processPexCh(ctx)
  123. go r.processPeerUpdates(ctx)
  124. return nil
  125. }
  126. // OnStop stops the reactor by signaling to all spawned goroutines to exit and
  127. // blocking until they all exit.
  128. func (r *Reactor) OnStop() {}
  129. // processPexCh implements a blocking event loop where we listen for p2p
  130. // Envelope messages from the pexCh.
  131. func (r *Reactor) processPexCh(ctx context.Context) {
  132. timer := time.NewTimer(0)
  133. defer timer.Stop()
  134. r.mtx.Lock()
  135. var (
  136. duration = r.calculateNextRequestTime()
  137. err error
  138. )
  139. r.mtx.Unlock()
  140. incoming := make(chan *p2p.Envelope)
  141. go func() {
  142. defer close(incoming)
  143. iter := r.pexCh.Receive(ctx)
  144. for iter.Next(ctx) {
  145. select {
  146. case <-ctx.Done():
  147. return
  148. case incoming <- iter.Envelope():
  149. }
  150. }
  151. }()
  152. for {
  153. timer.Reset(duration)
  154. select {
  155. case <-ctx.Done():
  156. return
  157. // outbound requests for new peers
  158. case <-timer.C:
  159. duration, err = r.sendRequestForPeers(ctx)
  160. if err != nil {
  161. return
  162. }
  163. // inbound requests for new peers or responses to requests sent by this
  164. // reactor
  165. case envelope, ok := <-incoming:
  166. if !ok {
  167. return
  168. }
  169. duration, err = r.handleMessage(ctx, r.pexCh.ID, envelope)
  170. if err != nil {
  171. r.logger.Error("failed to process message", "ch_id", r.pexCh.ID, "envelope", envelope, "err", err)
  172. if serr := r.pexCh.SendError(ctx, p2p.PeerError{
  173. NodeID: envelope.From,
  174. Err: err,
  175. }); serr != nil {
  176. return
  177. }
  178. }
  179. }
  180. }
  181. }
  182. // processPeerUpdates initiates a blocking process where we listen for and handle
  183. // PeerUpdate messages. When the reactor is stopped, we will catch the signal and
  184. // close the p2p PeerUpdatesCh gracefully.
  185. func (r *Reactor) processPeerUpdates(ctx context.Context) {
  186. for {
  187. select {
  188. case <-ctx.Done():
  189. return
  190. case peerUpdate := <-r.peerUpdates.Updates():
  191. r.processPeerUpdate(peerUpdate)
  192. }
  193. }
  194. }
  195. // handlePexMessage handles envelopes sent from peers on the PexChannel.
  196. func (r *Reactor) handlePexMessage(ctx context.Context, envelope *p2p.Envelope) (time.Duration, error) {
  197. logger := r.logger.With("peer", envelope.From)
  198. switch msg := envelope.Message.(type) {
  199. case *protop2p.PexRequest:
  200. // check if the peer hasn't sent a prior request too close to this one
  201. // in time
  202. if err := r.markPeerRequest(envelope.From); err != nil {
  203. return time.Minute, err
  204. }
  205. // request peers from the peer manager and parse the NodeAddresses into
  206. // URL strings
  207. nodeAddresses := r.peerManager.Advertise(envelope.From, maxAddresses)
  208. pexAddresses := make([]protop2p.PexAddress, len(nodeAddresses))
  209. for idx, addr := range nodeAddresses {
  210. pexAddresses[idx] = protop2p.PexAddress{
  211. URL: addr.String(),
  212. }
  213. }
  214. if err := r.pexCh.Send(ctx, p2p.Envelope{
  215. To: envelope.From,
  216. Message: &protop2p.PexResponse{Addresses: pexAddresses},
  217. }); err != nil {
  218. return 0, err
  219. }
  220. return time.Second, nil
  221. case *protop2p.PexResponse:
  222. // check if the response matches a request that was made to that peer
  223. if err := r.markPeerResponse(envelope.From); err != nil {
  224. return time.Minute, err
  225. }
  226. // check the size of the response
  227. if len(msg.Addresses) > int(maxAddresses) {
  228. return 10 * time.Minute, fmt.Errorf("peer sent too many addresses (max: %d, got: %d)",
  229. maxAddresses,
  230. len(msg.Addresses),
  231. )
  232. }
  233. for _, pexAddress := range msg.Addresses {
  234. peerAddress, err := p2p.ParseNodeAddress(pexAddress.URL)
  235. if err != nil {
  236. continue
  237. }
  238. added, err := r.peerManager.Add(peerAddress)
  239. if err != nil {
  240. logger.Error("failed to add PEX address", "address", peerAddress, "err", err)
  241. }
  242. if added {
  243. r.newPeers++
  244. logger.Debug("added PEX address", "address", peerAddress)
  245. }
  246. r.totalPeers++
  247. }
  248. return 10 * time.Minute, nil
  249. default:
  250. return time.Second, fmt.Errorf("received unknown message: %T", msg)
  251. }
  252. }
  253. // handleMessage handles an Envelope sent from a peer on a specific p2p Channel.
  254. // It will handle errors and any possible panics gracefully. A caller can handle
  255. // any error returned by sending a PeerError on the respective channel.
  256. func (r *Reactor) handleMessage(ctx context.Context, chID p2p.ChannelID, envelope *p2p.Envelope) (duration time.Duration, err error) {
  257. defer func() {
  258. if e := recover(); e != nil {
  259. err = fmt.Errorf("panic in processing message: %v", e)
  260. r.logger.Error(
  261. "recovering from processing message panic",
  262. "err", err,
  263. "stack", string(debug.Stack()),
  264. )
  265. }
  266. }()
  267. r.logger.Debug("received PEX message", "peer", envelope.From)
  268. switch chID {
  269. case p2p.ChannelID(PexChannel):
  270. duration, err = r.handlePexMessage(ctx, envelope)
  271. default:
  272. err = fmt.Errorf("unknown channel ID (%d) for envelope (%v)", chID, envelope)
  273. }
  274. return
  275. }
  276. // processPeerUpdate processes a PeerUpdate. For added peers, PeerStatusUp, we
  277. // send a request for addresses.
  278. func (r *Reactor) processPeerUpdate(peerUpdate p2p.PeerUpdate) {
  279. r.logger.Debug("received PEX peer update", "peer", peerUpdate.NodeID, "status", peerUpdate.Status)
  280. r.mtx.Lock()
  281. defer r.mtx.Unlock()
  282. switch peerUpdate.Status {
  283. case p2p.PeerStatusUp:
  284. r.availablePeers[peerUpdate.NodeID] = struct{}{}
  285. case p2p.PeerStatusDown:
  286. delete(r.availablePeers, peerUpdate.NodeID)
  287. delete(r.requestsSent, peerUpdate.NodeID)
  288. delete(r.lastReceivedRequests, peerUpdate.NodeID)
  289. default:
  290. }
  291. }
  292. // sendRequestForPeers pops the first peerID off the list and sends the
  293. // peer a request for more peer addresses. The function then moves the
  294. // peer into the requestsSent bucket and calculates when the next request
  295. // time should be
  296. func (r *Reactor) sendRequestForPeers(ctx context.Context) (time.Duration, error) {
  297. r.mtx.Lock()
  298. defer r.mtx.Unlock()
  299. if len(r.availablePeers) == 0 {
  300. // no peers are available
  301. r.logger.Debug("no available peers to send request to, waiting...")
  302. return noAvailablePeersWaitPeriod, nil
  303. }
  304. var peerID types.NodeID
  305. // use range to get a random peer.
  306. for peerID = range r.availablePeers {
  307. break
  308. }
  309. // send out the pex request
  310. if err := r.pexCh.Send(ctx, p2p.Envelope{
  311. To: peerID,
  312. Message: &protop2p.PexRequest{},
  313. }); err != nil {
  314. return 0, err
  315. }
  316. // remove the peer from the abvailable peers list and mark it in the requestsSent map
  317. delete(r.availablePeers, peerID)
  318. r.requestsSent[peerID] = struct{}{}
  319. dur := r.calculateNextRequestTime()
  320. r.logger.Debug("peer request sent", "next_request_time", dur)
  321. return dur, nil
  322. }
  323. // calculateNextRequestTime implements something of a proportional controller
  324. // to estimate how often the reactor should be requesting new peer addresses.
  325. // The dependent variable in this calculation is the ratio of new peers to
  326. // all peers that the reactor receives. The interval is thus calculated as the
  327. // inverse squared. In the beginning, all peers should be new peers.
  328. // We expect this ratio to be near 1 and thus the interval to be as short
  329. // as possible. As the node becomes more familiar with the network the ratio of
  330. // new nodes will plummet to a very small number, meaning the interval expands
  331. // to its upper bound.
  332. //
  333. // CONTRACT: The caller must hold r.mtx exclusively when calling this method.
  334. func (r *Reactor) calculateNextRequestTime() time.Duration {
  335. // check if the peer store is full. If so then there is no need
  336. // to send peer requests too often
  337. if ratio := r.peerManager.PeerRatio(); ratio >= 0.95 {
  338. r.logger.Debug("peer manager near full ratio, sleeping...",
  339. "sleep_period", fullCapacityInterval, "ratio", ratio)
  340. return fullCapacityInterval
  341. }
  342. // baseTime represents the shortest interval that we can send peer requests
  343. // in. For example if we have 10 peers and we can't send a message to the
  344. // same peer every 500ms, then we can send a request every 50ms. In practice
  345. // we use a safety margin of 2, ergo 100ms
  346. peers := tmmath.MinInt(len(r.availablePeers), 50)
  347. baseTime := minReceiveRequestInterval
  348. if peers > 0 {
  349. baseTime = minReceiveRequestInterval * 2 / time.Duration(peers)
  350. }
  351. if r.totalPeers > 0 || r.discoveryRatio == 0 {
  352. // find the ratio of new peers. NOTE: We add 1 to both sides to avoid
  353. // divide by zero problems
  354. ratio := float32(r.totalPeers+1) / float32(r.newPeers+1)
  355. // square the ratio in order to get non linear time intervals
  356. // NOTE: The longest possible interval for a network with 100 or more peers
  357. // where a node is connected to 50 of them is 2 minutes.
  358. r.discoveryRatio = ratio * ratio
  359. r.newPeers = 0
  360. r.totalPeers = 0
  361. }
  362. // NOTE: As ratio is always >= 1, discovery ratio is >= 1. Therefore we don't need to worry
  363. // about the next request time being less than the minimum time
  364. return baseTime * time.Duration(r.discoveryRatio)
  365. }
  366. func (r *Reactor) markPeerRequest(peer types.NodeID) error {
  367. r.mtx.Lock()
  368. defer r.mtx.Unlock()
  369. if lastRequestTime, ok := r.lastReceivedRequests[peer]; ok {
  370. if time.Now().Before(lastRequestTime.Add(minReceiveRequestInterval)) {
  371. return fmt.Errorf("peer sent a request too close after a prior one. Minimum interval: %v",
  372. minReceiveRequestInterval)
  373. }
  374. }
  375. r.lastReceivedRequests[peer] = time.Now()
  376. return nil
  377. }
  378. func (r *Reactor) markPeerResponse(peer types.NodeID) error {
  379. r.mtx.Lock()
  380. defer r.mtx.Unlock()
  381. // check if a request to this peer was sent
  382. if _, ok := r.requestsSent[peer]; !ok {
  383. return fmt.Errorf("peer sent a PEX response when none was requested (%v)", peer)
  384. }
  385. delete(r.requestsSent, peer)
  386. // attach to the back of the list so that the peer can be used again for
  387. // future requests
  388. r.availablePeers[peer] = struct{}{}
  389. return nil
  390. }