zolfa
/
tendermint


								package p2p


								import (

									"context"

									"errors"

									"fmt"

									"io"

									"math/rand"

									"net"

									"runtime"

									"sync"

									"time"


									"github.com/gogo/protobuf/proto"


									"github.com/tendermint/tendermint/crypto"

									"github.com/tendermint/tendermint/libs/log"

									"github.com/tendermint/tendermint/libs/service"

									"github.com/tendermint/tendermint/types"

								)


								const queueBufferDefault = 32


								// ChannelID is an arbitrary channel ID.

								type ChannelID uint16


								// Envelope contains a message with sender/receiver routing info.

								type Envelope struct {

									From      types.NodeID  // sender (empty if outbound)

									To        types.NodeID  // receiver (empty if inbound)

									Broadcast bool          // send to all connected peers (ignores To)

									Message   proto.Message // message payload


									// channelID is for internal Router use, set on outbound messages to inform

									// the sendPeer() goroutine which transport channel to use.

									//

									// FIXME: If we migrate the Transport API to a byte-oriented multi-stream

									// API, this will no longer be necessary since each channel will be mapped

									// onto a stream during channel/peer setup. See:

									// https://github.com/tendermint/spec/pull/227

									channelID ChannelID

								}


								// PeerError is a peer error reported via Channel.Error.

								//

								// FIXME: This currently just disconnects the peer, which is too simplistic.

								// For example, some errors should be logged, some should cause disconnects,

								// and some should ban the peer.

								//

								// FIXME: This should probably be replaced by a more general PeerBehavior

								// concept that can mark good and bad behavior and contributes to peer scoring.

								// It should possibly also allow reactors to request explicit actions, e.g.

								// disconnection or banning, in addition to doing this based on aggregates.

								type PeerError struct {

									NodeID types.NodeID

									Err    error

								}


								// Channel is a bidirectional channel to exchange Protobuf messages with peers,

								// wrapped in Envelope to specify routing info (i.e. sender/receiver).

								type Channel struct {

									ID    ChannelID

									In    <-chan Envelope  // inbound messages (peers to reactors)

									Out   chan<- Envelope  // outbound messages (reactors to peers)

									Error chan<- PeerError // peer error reporting


									messageType proto.Message // the channel's message type, used for unmarshaling

									closeCh     chan struct{}

									closeOnce   sync.Once

								}


								// NewChannel creates a new channel. It is primarily for internal and test

								// use, reactors should use Router.OpenChannel().

								func NewChannel(

									id ChannelID,

									messageType proto.Message,

									inCh <-chan Envelope,

									outCh chan<- Envelope,

									errCh chan<- PeerError,

								) *Channel {

									return &Channel{

										ID:          id,

										messageType: messageType,

										In:          inCh,

										Out:         outCh,

										Error:       errCh,

										closeCh:     make(chan struct{}),

									}

								}


								// Close closes the channel. Future sends on Out and Error will panic. The In

								// channel remains open to avoid having to synchronize Router senders, which

								// should use Done() to detect channel closure instead.

								func (c *Channel) Close() {

									c.closeOnce.Do(func() {

										close(c.closeCh)

										close(c.Out)

										close(c.Error)

									})

								}


								// Done returns a channel that's closed when Channel.Close() is called.

								func (c *Channel) Done() <-chan struct{} {

									return c.closeCh

								}


								// Wrapper is a Protobuf message that can contain a variety of inner messages

								// (e.g. via oneof fields). If a Channel's message type implements Wrapper, the

								// Router will automatically wrap outbound messages and unwrap inbound messages,

								// such that reactors do not have to do this themselves.

								type Wrapper interface {

									proto.Message


									// Wrap will take a message and wrap it in this one if possible.

									Wrap(proto.Message) error


									// Unwrap will unwrap the inner message contained in this message.

									Unwrap() (proto.Message, error)

								}


								// RouterOptions specifies options for a Router.

								type RouterOptions struct {

									// ResolveTimeout is the timeout for resolving NodeAddress URLs.

									// 0 means no timeout.

									ResolveTimeout time.Duration


									// DialTimeout is the timeout for dialing a peer. 0 means no timeout.

									DialTimeout time.Duration


									// HandshakeTimeout is the timeout for handshaking with a peer. 0 means

									// no timeout.

									HandshakeTimeout time.Duration


									// QueueType must be "wdrr" (Weighed Deficit Round Robin), "priority", or

									// "fifo". Defaults to "fifo".

									QueueType string


									// MaxIncomingConnectionAttempts rate limits the number of incoming connection

									// attempts per IP address. Defaults to 100.

									MaxIncomingConnectionAttempts uint


									// IncomingConnectionWindow describes how often an IP address

									// can attempt to create a new connection. Defaults to 10

									// milliseconds, and cannot be less than 1 millisecond.

									IncomingConnectionWindow time.Duration


									// FilterPeerByIP is used by the router to inject filtering

									// behavior for new incoming connections. The router passes

									// the remote IP of the incoming connection the port number as

									// arguments. Functions should return an error to reject the

									// peer.

									FilterPeerByIP func(context.Context, net.IP, uint16) error


									// FilterPeerByID is used by the router to inject filtering

									// behavior for new incoming connections. The router passes

									// the NodeID of the node before completing the connection,

									// but this occurs after the handshake is complete. Filter by

									// IP address to filter before the handshake. Functions should

									// return an error to reject the peer.

									FilterPeerByID func(context.Context, types.NodeID) error


									// DialSleep controls the amount of time that the router

									// sleeps between dialing peers. If not set, a default value

									// is used that sleeps for a (random) amount of time up to 3

									// seconds between submitting each peer to be dialed.

									DialSleep func(context.Context)


									// NumConcrruentDials controls how many parallel go routines

									// are used to dial peers. This defaults to the value of

									// runtime.NumCPU.

									NumConcurrentDials func() int

								}


								const (

									queueTypeFifo     = "fifo"

									queueTypePriority = "priority"

									queueTypeWDRR     = "wdrr"

								)


								// Validate validates router options.

								func (o *RouterOptions) Validate() error {

									switch o.QueueType {

									case "":

										o.QueueType = queueTypeFifo

									case queueTypeFifo, queueTypeWDRR, queueTypePriority:

										// passI me

									default:

										return fmt.Errorf("queue type %q is not supported", o.QueueType)

									}


									switch {

									case o.IncomingConnectionWindow == 0:

										o.IncomingConnectionWindow = 100 * time.Millisecond

									case o.IncomingConnectionWindow < time.Millisecond:

										return fmt.Errorf("incomming connection window must be grater than 1m [%s]",

											o.IncomingConnectionWindow)

									}


									if o.MaxIncomingConnectionAttempts == 0 {

										o.MaxIncomingConnectionAttempts = 100

									}


									return nil

								}


								// Router manages peer connections and routes messages between peers and reactor

								// channels. It takes a PeerManager for peer lifecycle management (e.g. which

								// peers to dial and when) and a set of Transports for connecting and

								// communicating with peers.

								//

								// On startup, three main goroutines are spawned to maintain peer connections:

								//

								//   dialPeers(): in a loop, calls PeerManager.DialNext() to get the next peer

								//   address to dial and spawns a goroutine that dials the peer, handshakes

								//   with it, and begins to route messages if successful.

								//

								//   acceptPeers(): in a loop, waits for an inbound connection via

								//   Transport.Accept() and spawns a goroutine that handshakes with it and

								//   begins to route messages if successful.

								//

								//   evictPeers(): in a loop, calls PeerManager.EvictNext() to get the next

								//   peer to evict, and disconnects it by closing its message queue.

								//

								// When a peer is connected, an outbound peer message queue is registered in

								// peerQueues, and routePeer() is called to spawn off two additional goroutines:

								//

								//   sendPeer(): waits for an outbound message from the peerQueues queue,

								//   marshals it, and passes it to the peer transport which delivers it.

								//

								//   receivePeer(): waits for an inbound message from the peer transport,

								//   unmarshals it, and passes it to the appropriate inbound channel queue

								//   in channelQueues.

								//

								// When a reactor opens a channel via OpenChannel, an inbound channel message

								// queue is registered in channelQueues, and a channel goroutine is spawned:

								//

								//   routeChannel(): waits for an outbound message from the channel, looks

								//   up the recipient peer's outbound message queue in peerQueues, and submits

								//   the message to it.

								//

								// All channel sends in the router are blocking. It is the responsibility of the

								// queue interface in peerQueues and channelQueues to prioritize and drop

								// messages as appropriate during contention to prevent stalls and ensure good

								// quality of service.

								type Router struct {

									*service.BaseService


									logger             log.Logger

									metrics            *Metrics

									options            RouterOptions

									nodeInfo           types.NodeInfo

									privKey            crypto.PrivKey

									peerManager        *PeerManager

									chDescs            []ChannelDescriptor

									transports         []Transport

									connTracker        connectionTracker

									protocolTransports map[Protocol]Transport

									stopCh             chan struct{} // signals Router shutdown


									peerMtx    sync.RWMutex

									peerQueues map[types.NodeID]queue // outbound messages per peer for all channels

									// the channels that the peer queue has open

									peerChannels map[types.NodeID]channelIDs

									queueFactory func(int) queue


									// FIXME: We don't strictly need to use a mutex for this if we seal the

									// channels on router start. This depends on whether we want to allow

									// dynamic channels in the future.

									channelMtx      sync.RWMutex

									channelQueues   map[ChannelID]queue // inbound messages from all peers to a single channel

									channelMessages map[ChannelID]proto.Message

								}


								// NewRouter creates a new Router. The given Transports must already be

								// listening on appropriate interfaces, and will be closed by the Router when it

								// stops.

								func NewRouter(

									logger log.Logger,

									metrics *Metrics,

									nodeInfo types.NodeInfo,

									privKey crypto.PrivKey,

									peerManager *PeerManager,

									transports []Transport,

									options RouterOptions,

								) (*Router, error) {


									if err := options.Validate(); err != nil {

										return nil, err

									}


									router := &Router{

										logger:   logger,

										metrics:  metrics,

										nodeInfo: nodeInfo,

										privKey:  privKey,

										connTracker: newConnTracker(

											options.MaxIncomingConnectionAttempts,

											options.IncomingConnectionWindow,

										),

										chDescs:            make([]ChannelDescriptor, 0),

										transports:         transports,

										protocolTransports: map[Protocol]Transport{},

										peerManager:        peerManager,

										options:            options,

										stopCh:             make(chan struct{}),

										channelQueues:      map[ChannelID]queue{},

										channelMessages:    map[ChannelID]proto.Message{},

										peerQueues:         map[types.NodeID]queue{},

										peerChannels:       make(map[types.NodeID]channelIDs),

									}


									router.BaseService = service.NewBaseService(logger, "router", router)


									qf, err := router.createQueueFactory()

									if err != nil {

										return nil, err

									}


									router.queueFactory = qf


									for _, transport := range transports {

										for _, protocol := range transport.Protocols() {

											if _, ok := router.protocolTransports[protocol]; !ok {

												router.protocolTransports[protocol] = transport

											}

										}

									}


									return router, nil

								}


								func (r *Router) createQueueFactory() (func(int) queue, error) {

									switch r.options.QueueType {

									case queueTypeFifo:

										return newFIFOQueue, nil


									case queueTypePriority:

										return func(size int) queue {

											if size%2 != 0 {

												size++

											}


											q := newPQScheduler(r.logger, r.metrics, r.chDescs, uint(size)/2, uint(size)/2, defaultCapacity)

											q.start()

											return q

										}, nil


									case queueTypeWDRR:

										return func(size int) queue {

											if size%2 != 0 {

												size++

											}


											q := newWDRRScheduler(r.logger, r.metrics, r.chDescs, uint(size)/2, uint(size)/2, defaultCapacity)

											q.start()

											return q

										}, nil


									default:

										return nil, fmt.Errorf("cannot construct queue of type %q", r.options.QueueType)

									}

								}


								// OpenChannel opens a new channel for the given message type. The caller must

								// close the channel when done, before stopping the Router. messageType is the

								// type of message passed through the channel (used for unmarshaling), which can

								// implement Wrapper to automatically (un)wrap multiple message types in a

								// wrapper message. The caller may provide a size to make the channel buffered,

								// which internally makes the inbound, outbound, and error channel buffered.

								func (r *Router) OpenChannel(chDesc ChannelDescriptor, messageType proto.Message, size int) (*Channel, error) {

									r.channelMtx.Lock()

									defer r.channelMtx.Unlock()


									id := ChannelID(chDesc.ID)

									if _, ok := r.channelQueues[id]; ok {

										return nil, fmt.Errorf("channel %v already exists", id)

									}

									r.chDescs = append(r.chDescs, chDesc)


									queue := r.queueFactory(size)

									outCh := make(chan Envelope, size)

									errCh := make(chan PeerError, size)

									channel := NewChannel(id, messageType, queue.dequeue(), outCh, errCh)


									var wrapper Wrapper

									if w, ok := messageType.(Wrapper); ok {

										wrapper = w

									}


									r.channelQueues[id] = queue

									r.channelMessages[id] = messageType


									// add the channel to the nodeInfo if it's not already there.

									r.nodeInfo.AddChannel(uint16(chDesc.ID))


									go func() {

										defer func() {

											r.channelMtx.Lock()

											delete(r.channelQueues, id)

											delete(r.channelMessages, id)

											r.channelMtx.Unlock()

											queue.close()

										}()


										r.routeChannel(id, outCh, errCh, wrapper)

									}()


									return channel, nil

								}


								// routeChannel receives outbound channel messages and routes them to the

								// appropriate peer. It also receives peer errors and reports them to the peer

								// manager. It returns when either the outbound channel or error channel is

								// closed, or the Router is stopped. wrapper is an optional message wrapper

								// for messages, see Wrapper for details.

								func (r *Router) routeChannel(

									chID ChannelID,

									outCh <-chan Envelope,

									errCh <-chan PeerError,

									wrapper Wrapper,

								) {

									for {

										select {

										case envelope, ok := <-outCh:

											if !ok {

												return

											}


											// Mark the envelope with the channel ID to allow sendPeer() to pass

											// it on to Transport.SendMessage().

											envelope.channelID = chID


											// wrap the message in a wrapper message, if requested

											if wrapper != nil {

												msg := proto.Clone(wrapper)

												if err := msg.(Wrapper).Wrap(envelope.Message); err != nil {

													r.Logger.Error("failed to wrap message", "channel", chID, "err", err)

													continue

												}


												envelope.Message = msg

											}


											// collect peer queues to pass the message via

											var queues []queue

											if envelope.Broadcast {

												r.peerMtx.RLock()


												queues = make([]queue, 0, len(r.peerQueues))

												for nodeID, q := range r.peerQueues {

													peerChs := r.peerChannels[nodeID]


													// check whether the peer is receiving on that channel

													if _, ok := peerChs[chID]; ok {

														queues = append(queues, q)

													}

												}


												r.peerMtx.RUnlock()

											} else {

												r.peerMtx.RLock()


												q, ok := r.peerQueues[envelope.To]

												contains := false

												if ok {

													peerChs := r.peerChannels[envelope.To]


													// check whether the peer is receiving on that channel

													_, contains = peerChs[chID]

												}

												r.peerMtx.RUnlock()


												if !ok {

													r.logger.Debug("dropping message for unconnected peer", "peer", envelope.To, "channel", chID)

													continue

												}


												if !contains {

													// reactor tried to send a message across a channel that the

													// peer doesn't have available. This is a known issue due to

													// how peer subscriptions work:

													// https://github.com/tendermint/tendermint/issues/6598

													continue

												}


												queues = []queue{q}

											}


											// send message to peers

											for _, q := range queues {

												start := time.Now().UTC()


												select {

												case q.enqueue() <- envelope:

													r.metrics.RouterPeerQueueSend.Observe(time.Since(start).Seconds())


												case <-q.closed():

													r.logger.Debug("dropping message for unconnected peer", "peer", envelope.To, "channel", chID)


												case <-r.stopCh:

													return

												}

											}


										case peerError, ok := <-errCh:

											if !ok {

												return

											}


											r.logger.Error("peer error, evicting", "peer", peerError.NodeID, "err", peerError.Err)


											r.peerManager.Errored(peerError.NodeID, peerError.Err)


										case <-r.stopCh:

											return

										}

									}

								}


								func (r *Router) numConccurentDials() int {

									if r.options.NumConcurrentDials == nil {

										return runtime.NumCPU()

									}


									return r.options.NumConcurrentDials()

								}


								func (r *Router) filterPeersIP(ctx context.Context, ip net.IP, port uint16) error {

									if r.options.FilterPeerByIP == nil {

										return nil

									}


									return r.options.FilterPeerByIP(ctx, ip, port)

								}


								func (r *Router) filterPeersID(ctx context.Context, id types.NodeID) error {

									if r.options.FilterPeerByID == nil {

										return nil

									}


									return r.options.FilterPeerByID(ctx, id)

								}


								func (r *Router) dialSleep(ctx context.Context) {

									if r.options.DialSleep == nil {

										// nolint:gosec // G404: Use of weak random number generator

										timer := time.NewTimer(time.Duration(rand.Int63n(dialRandomizerIntervalMilliseconds)) * time.Millisecond)

										defer timer.Stop()


										select {

										case <-ctx.Done():

										case <-timer.C:

										}


										return

									}


									r.options.DialSleep(ctx)

								}


								// acceptPeers accepts inbound connections from peers on the given transport,

								// and spawns goroutines that route messages to/from them.

								func (r *Router) acceptPeers(transport Transport) {

									r.logger.Debug("starting accept routine", "transport", transport)

									ctx := r.stopCtx()

									for {

										conn, err := transport.Accept()

										switch err {

										case nil:

										case io.EOF:

											r.logger.Debug("stopping accept routine", "transport", transport)

											return

										default:

											r.logger.Error("failed to accept connection", "transport", transport, "err", err)

											return

										}


										incomingIP := conn.RemoteEndpoint().IP

										if err := r.connTracker.AddConn(incomingIP); err != nil {

											closeErr := conn.Close()

											r.logger.Debug("rate limiting incoming peer",

												"err", err,

												"ip", incomingIP.String(),

												"close_err", closeErr,

											)


											return

										}


										// Spawn a goroutine for the handshake, to avoid head-of-line blocking.

										go r.openConnection(ctx, conn)


									}

								}


								func (r *Router) openConnection(ctx context.Context, conn Connection) {

									defer conn.Close()

									defer r.connTracker.RemoveConn(conn.RemoteEndpoint().IP)


									re := conn.RemoteEndpoint()

									incomingIP := re.IP


									if err := r.filterPeersIP(ctx, incomingIP, re.Port); err != nil {

										r.logger.Debug("peer filtered by IP", "ip", incomingIP.String(), "err", err)

										return

									}


									// FIXME: The peer manager may reject the peer during Accepted()

									// after we've handshaked with the peer (to find out which peer it

									// is). However, because the handshake has no ack, the remote peer

									// will think the handshake was successful and start sending us

									// messages.

									//

									// This can cause problems in tests, where a disconnection can cause

									// the local node to immediately redial, while the remote node may

									// not have completed the disconnection yet and therefore reject the

									// reconnection attempt (since it thinks we're still connected from

									// before).

									//

									// The Router should do the handshake and have a final ack/fail

									// message to make sure both ends have accepted the connection, such

									// that it can be coordinated with the peer manager.

									peerInfo, _, err := r.handshakePeer(ctx, conn, "")

									switch {

									case errors.Is(err, context.Canceled):

										return

									case err != nil:

										r.logger.Error("peer handshake failed", "endpoint", conn, "err", err)

										return

									}

									if err := r.filterPeersID(ctx, peerInfo.NodeID); err != nil {

										r.logger.Debug("peer filtered by node ID", "node", peerInfo.NodeID, "err", err)

										return

									}


									if err := r.runWithPeerMutex(func() error { return r.peerManager.Accepted(peerInfo.NodeID) }); err != nil {

										r.logger.Error("failed to accept connection",

											"op", "incoming/accepted", "peer", peerInfo.NodeID, "err", err)

										return

									}


									r.routePeer(peerInfo.NodeID, conn, toChannelIDs(peerInfo.Channels))

								}


								// dialPeers maintains outbound connections to peers by dialing them.

								func (r *Router) dialPeers() {

									r.logger.Debug("starting dial routine")

									ctx := r.stopCtx()


									addresses := make(chan NodeAddress)

									wg := &sync.WaitGroup{}


									// Start a limited number of goroutines to dial peers in

									// parallel. the goal is to avoid starting an unbounded number

									// of goroutines thereby spamming the network, but also being

									// able to add peers at a reasonable pace, though the number

									// is somewhat arbitrary. The action is further throttled by a

									// sleep after sending to the addresses channel.

									for i := 0; i < r.numConccurentDials(); i++ {

										wg.Add(1)

										go func() {

											defer wg.Done()


											for {

												select {

												case <-ctx.Done():

													return

												case address := <-addresses:

													r.connectPeer(ctx, address)

												}

											}

										}()

									}


								LOOP:

									for {

										address, err := r.peerManager.DialNext(ctx)

										switch {

										case errors.Is(err, context.Canceled):

											r.logger.Debug("stopping dial routine")

											break LOOP

										case err != nil:

											r.logger.Error("failed to find next peer to dial", "err", err)

											break LOOP

										}


										select {

										case addresses <- address:

											// this jitters the frequency that we call

											// DialNext and prevents us from attempting to

											// create connections too quickly.


											r.dialSleep(ctx)

											continue

										case <-ctx.Done():

											close(addresses)

											break LOOP

										}

									}


									wg.Wait()

								}


								func (r *Router) connectPeer(ctx context.Context, address NodeAddress) {

									conn, err := r.dialPeer(ctx, address)

									switch {

									case errors.Is(err, context.Canceled):

										return

									case err != nil:

										r.logger.Error("failed to dial peer", "peer", address, "err", err)

										if err = r.peerManager.DialFailed(address); err != nil {

											r.logger.Error("failed to report dial failure", "peer", address, "err", err)

										}

										return

									}


									peerInfo, _, err := r.handshakePeer(ctx, conn, address.NodeID)

									switch {

									case errors.Is(err, context.Canceled):

										conn.Close()

										return

									case err != nil:

										r.logger.Error("failed to handshake with peer", "peer", address, "err", err)

										if err = r.peerManager.DialFailed(address); err != nil {

											r.logger.Error("failed to report dial failure", "peer", address, "err", err)

										}

										conn.Close()

										return

									}


									if err := r.runWithPeerMutex(func() error { return r.peerManager.Dialed(address) }); err != nil {

										r.logger.Error("failed to dial peer",

											"op", "outgoing/dialing", "peer", address.NodeID, "err", err)

										conn.Close()

										return

									}


									// routePeer (also) calls connection close

									go r.routePeer(address.NodeID, conn, toChannelIDs(peerInfo.Channels))

								}


								func (r *Router) getOrMakeQueue(peerID types.NodeID, channels channelIDs) queue {

									r.peerMtx.Lock()

									defer r.peerMtx.Unlock()


									if peerQueue, ok := r.peerQueues[peerID]; ok {

										return peerQueue

									}


									peerQueue := r.queueFactory(queueBufferDefault)

									r.peerQueues[peerID] = peerQueue

									r.peerChannels[peerID] = channels

									return peerQueue

								}


								// dialPeer connects to a peer by dialing it.

								func (r *Router) dialPeer(ctx context.Context, address NodeAddress) (Connection, error) {

									resolveCtx := ctx

									if r.options.ResolveTimeout > 0 {

										var cancel context.CancelFunc

										resolveCtx, cancel = context.WithTimeout(resolveCtx, r.options.ResolveTimeout)

										defer cancel()

									}


									r.logger.Debug("resolving peer address", "peer", address)

									endpoints, err := address.Resolve(resolveCtx)

									switch {

									case err != nil:

										return nil, fmt.Errorf("failed to resolve address %q: %w", address, err)

									case len(endpoints) == 0:

										return nil, fmt.Errorf("address %q did not resolve to any endpoints", address)

									}


									for _, endpoint := range endpoints {

										transport, ok := r.protocolTransports[endpoint.Protocol]

										if !ok {

											r.logger.Error("no transport found for protocol", "endpoint", endpoint)

											continue

										}


										dialCtx := ctx

										if r.options.DialTimeout > 0 {

											var cancel context.CancelFunc

											dialCtx, cancel = context.WithTimeout(dialCtx, r.options.DialTimeout)

											defer cancel()

										}


										// FIXME: When we dial and handshake the peer, we should pass it

										// appropriate address(es) it can use to dial us back. It can't use our

										// remote endpoint, since TCP uses different port numbers for outbound

										// connections than it does for inbound. Also, we may need to vary this

										// by the peer's endpoint, since e.g. a peer on 192.168.0.0 can reach us

										// on a private address on this endpoint, but a peer on the public

										// Internet can't and needs a different public address.

										conn, err := transport.Dial(dialCtx, endpoint)

										if err != nil {

											r.logger.Error("failed to dial endpoint", "peer", address.NodeID, "endpoint", endpoint, "err", err)

										} else {

											r.logger.Debug("dialed peer", "peer", address.NodeID, "endpoint", endpoint)

											return conn, nil

										}

									}

									return nil, errors.New("all endpoints failed")

								}


								// handshakePeer handshakes with a peer, validating the peer's information. If

								// expectID is given, we check that the peer's info matches it.

								func (r *Router) handshakePeer(

									ctx context.Context,

									conn Connection,

									expectID types.NodeID,

								) (types.NodeInfo, crypto.PubKey, error) {


									if r.options.HandshakeTimeout > 0 {

										var cancel context.CancelFunc

										ctx, cancel = context.WithTimeout(ctx, r.options.HandshakeTimeout)

										defer cancel()

									}


									peerInfo, peerKey, err := conn.Handshake(ctx, r.nodeInfo, r.privKey)

									if err != nil {

										return peerInfo, peerKey, err

									}

									if err = peerInfo.Validate(); err != nil {

										return peerInfo, peerKey, fmt.Errorf("invalid handshake NodeInfo: %w", err)

									}

									if types.NodeIDFromPubKey(peerKey) != peerInfo.NodeID {

										return peerInfo, peerKey, fmt.Errorf("peer's public key did not match its node ID %q (expected %q)",

											peerInfo.NodeID, types.NodeIDFromPubKey(peerKey))

									}

									if expectID != "" && expectID != peerInfo.NodeID {

										return peerInfo, peerKey, fmt.Errorf("expected to connect with peer %q, got %q",

											expectID, peerInfo.NodeID)

									}

									if err := r.nodeInfo.CompatibleWith(peerInfo); err != nil {

										return peerInfo, peerKey, ErrRejected{

											err:            err,

											id:             peerInfo.ID(),

											isIncompatible: true,

										}

									}

									return peerInfo, peerKey, nil

								}


								func (r *Router) runWithPeerMutex(fn func() error) error {

									r.peerMtx.Lock()

									defer r.peerMtx.Unlock()

									return fn()

								}


								// routePeer routes inbound and outbound messages between a peer and the reactor

								// channels. It will close the given connection and send queue when done, or if

								// they are closed elsewhere it will cause this method to shut down and return.

								func (r *Router) routePeer(peerID types.NodeID, conn Connection, channels channelIDs) {

									r.metrics.Peers.Add(1)

									r.peerManager.Ready(peerID)


									sendQueue := r.getOrMakeQueue(peerID, channels)

									defer func() {

										r.peerMtx.Lock()

										delete(r.peerQueues, peerID)

										delete(r.peerChannels, peerID)

										r.peerMtx.Unlock()


										sendQueue.close()


										r.peerManager.Disconnected(peerID)

										r.metrics.Peers.Add(-1)

									}()


									r.logger.Info("peer connected", "peer", peerID, "endpoint", conn)


									errCh := make(chan error, 2)


									go func() {

										errCh <- r.receivePeer(peerID, conn)

									}()


									go func() {

										errCh <- r.sendPeer(peerID, conn, sendQueue)

									}()


									err := <-errCh

									_ = conn.Close()

									sendQueue.close()


									if e := <-errCh; err == nil {

										// The first err was nil, so we update it with the second err, which may

										// or may not be nil.

										err = e

									}


									switch err {

									case nil, io.EOF:

										r.logger.Info("peer disconnected", "peer", peerID, "endpoint", conn)


									default:

										r.logger.Error("peer failure", "peer", peerID, "endpoint", conn, "err", err)

									}

								}


								// receivePeer receives inbound messages from a peer, deserializes them and

								// passes them on to the appropriate channel.

								func (r *Router) receivePeer(peerID types.NodeID, conn Connection) error {

									for {

										chID, bz, err := conn.ReceiveMessage()

										if err != nil {

											return err

										}


										r.channelMtx.RLock()

										queue, ok := r.channelQueues[chID]

										messageType := r.channelMessages[chID]

										r.channelMtx.RUnlock()


										if !ok {

											r.logger.Debug("dropping message for unknown channel", "peer", peerID, "channel", chID)

											continue

										}


										msg := proto.Clone(messageType)

										if err := proto.Unmarshal(bz, msg); err != nil {

											r.logger.Error("message decoding failed, dropping message", "peer", peerID, "err", err)

											continue

										}


										if wrapper, ok := msg.(Wrapper); ok {

											msg, err = wrapper.Unwrap()

											if err != nil {

												r.logger.Error("failed to unwrap message", "err", err)

												continue

											}

										}


										start := time.Now().UTC()


										select {

										case queue.enqueue() <- Envelope{From: peerID, Message: msg}:

											r.metrics.PeerReceiveBytesTotal.With(

												"chID", fmt.Sprint(chID),

												"peer_id", string(peerID)).Add(float64(proto.Size(msg)))

											r.metrics.RouterChannelQueueSend.Observe(time.Since(start).Seconds())

											r.logger.Debug("received message", "peer", peerID, "message", msg)


										case <-queue.closed():

											r.logger.Debug("channel closed, dropping message", "peer", peerID, "channel", chID)


										case <-r.stopCh:

											return nil

										}

									}

								}


								// sendPeer sends queued messages to a peer.

								func (r *Router) sendPeer(peerID types.NodeID, conn Connection, peerQueue queue) error {

									for {

										start := time.Now().UTC()


										select {

										case envelope := <-peerQueue.dequeue():

											r.metrics.RouterPeerQueueRecv.Observe(time.Since(start).Seconds())

											if envelope.Message == nil {

												r.logger.Error("dropping nil message", "peer", peerID)

												continue

											}


											bz, err := proto.Marshal(envelope.Message)

											if err != nil {

												r.logger.Error("failed to marshal message", "peer", peerID, "err", err)

												continue

											}


											_, err = conn.SendMessage(envelope.channelID, bz)

											if err != nil {

												return err

											}


											r.logger.Debug("sent message", "peer", envelope.To, "message", envelope.Message)


										case <-peerQueue.closed():

											return nil


										case <-r.stopCh:

											return nil

										}

									}

								}


								// evictPeers evicts connected peers as requested by the peer manager.

								func (r *Router) evictPeers() {

									r.logger.Debug("starting evict routine")

									ctx := r.stopCtx()


									for {

										peerID, err := r.peerManager.EvictNext(ctx)


										switch {

										case errors.Is(err, context.Canceled):

											r.logger.Debug("stopping evict routine")

											return


										case err != nil:

											r.logger.Error("failed to find next peer to evict", "err", err)

											return

										}


										r.logger.Info("evicting peer", "peer", peerID)


										r.peerMtx.RLock()

										queue, ok := r.peerQueues[peerID]

										r.peerMtx.RUnlock()


										if ok {

											queue.close()

										}

									}

								}


								// NodeInfo returns a copy of the current NodeInfo. Used for testing.

								func (r *Router) NodeInfo() types.NodeInfo {

									return r.nodeInfo.Copy()

								}


								// OnStart implements service.Service.

								func (r *Router) OnStart() error {

									netAddr, _ := r.nodeInfo.NetAddress()

									r.Logger.Info(

										"starting router",

										"node_id", r.nodeInfo.NodeID,

										"channels", r.nodeInfo.Channels,

										"listen_addr", r.nodeInfo.ListenAddr,

										"net_addr", netAddr,

									)


									go r.dialPeers()

									go r.evictPeers()


									for _, transport := range r.transports {

										go r.acceptPeers(transport)

									}


									return nil

								}


								// OnStop implements service.Service.

								//

								// All channels must be closed by OpenChannel() callers before stopping the

								// router, to prevent blocked channel sends in reactors. Channels are not closed

								// here, since that would cause any reactor senders to panic, so it is the

								// sender's responsibility.

								func (r *Router) OnStop() {

									// Signal router shutdown.

									close(r.stopCh)


									// Close transport listeners (unblocks Accept calls).

									for _, transport := range r.transports {

										if err := transport.Close(); err != nil {

											r.logger.Error("failed to close transport", "transport", transport, "err", err)

										}

									}


									// Collect all remaining queues, and wait for them to close.

									queues := []queue{}


									r.channelMtx.RLock()

									for _, q := range r.channelQueues {

										queues = append(queues, q)

									}

									r.channelMtx.RUnlock()


									r.peerMtx.RLock()

									for _, q := range r.peerQueues {

										queues = append(queues, q)

									}

									r.peerMtx.RUnlock()


									for _, q := range queues {

										<-q.closed()

									}

								}


								// stopCtx returns a new context that is canceled when the router stops.

								func (r *Router) stopCtx() context.Context {

									ctx, cancel := context.WithCancel(context.Background())


									go func() {

										<-r.stopCh

										cancel()

									}()


									return ctx

								}


								type channelIDs map[ChannelID]struct{}


								func toChannelIDs(bytes []byte) channelIDs {

									c := make(map[ChannelID]struct{}, len(bytes))

									for _, b := range bytes {

										c[ChannelID(b)] = struct{}{}

									}

									return c

								}