zolfa
/
tendermint


								package p2p


								import (

									"fmt"

									"sort"

									"strconv"


									"github.com/gogo/protobuf/proto"

									tmsync "github.com/tendermint/tendermint/internal/libs/sync"

									"github.com/tendermint/tendermint/libs/log"

								)


								// wrappedEnvelope wraps a p2p Envelope with its precomputed size.

								type wrappedEnvelope struct {

									envelope Envelope

									size     uint

								}


								// assert the WDRR scheduler implements the queue interface at compile-time

								var _ queue = (*wdrrScheduler)(nil)


								// wdrrQueue implements a Weighted Deficit Round Robin (WDRR) scheduling

								// algorithm via the queue interface. A WDRR queue is created per peer, where

								// the queue will have N number of flows. Each flow corresponds to a p2p Channel,

								// so there are n input flows and a single output source, the peer's connection.

								//

								// The WDRR scheduler contains a shared buffer with a fixed capacity.

								//

								// Each flow has the following:

								// - quantum: The number of bytes that is added to the deficit counter of the

								//   flow in each round. The flow can send at most quantum bytes at a time. Each

								//   flow has its own unique quantum, which gives the queue its weighted nature.

								//   A higher quantum corresponds to a higher weight/priority. The quantum is

								//   computed as MaxSendBytes * Priority.

								// - deficit counter: The number of bytes that the flow is allowed to transmit

								//   when it is its turn.

								//

								// See: https://en.wikipedia.org/wiki/Deficit_round_robin

								type wdrrScheduler struct {

									logger       log.Logger

									metrics      *Metrics

									chDescs      []ChannelDescriptor

									capacity     uint

									size         uint

									chPriorities map[ChannelID]uint

									buffer       map[ChannelID][]wrappedEnvelope

									quanta       map[ChannelID]uint

									deficits     map[ChannelID]uint


									closer *tmsync.Closer

									doneCh *tmsync.Closer


									enqueueCh chan Envelope

									dequeueCh chan Envelope

								}


								func newWDRRScheduler(

									logger log.Logger,

									m *Metrics,

									chDescs []ChannelDescriptor,

									enqueueBuf, dequeueBuf, capacity uint,

								) *wdrrScheduler {


									// copy each ChannelDescriptor and sort them by channel priority

									chDescsCopy := make([]ChannelDescriptor, len(chDescs))

									copy(chDescsCopy, chDescs)

									sort.Slice(chDescsCopy, func(i, j int) bool { return chDescsCopy[i].Priority > chDescsCopy[j].Priority })


									var (

										buffer       = make(map[ChannelID][]wrappedEnvelope)

										chPriorities = make(map[ChannelID]uint)

										quanta       = make(map[ChannelID]uint)

										deficits     = make(map[ChannelID]uint)

									)


									for _, chDesc := range chDescsCopy {

										chID := ChannelID(chDesc.ID)

										chPriorities[chID] = uint(chDesc.Priority)

										buffer[chID] = make([]wrappedEnvelope, 0)

										quanta[chID] = chDesc.MaxSendBytes * uint(chDesc.Priority)

									}


									return &wdrrScheduler{

										logger:       logger.With("queue", "wdrr"),

										metrics:      m,

										capacity:     capacity,

										chPriorities: chPriorities,

										chDescs:      chDescsCopy,

										buffer:       buffer,

										quanta:       quanta,

										deficits:     deficits,

										closer:       tmsync.NewCloser(),

										doneCh:       tmsync.NewCloser(),

										enqueueCh:    make(chan Envelope, enqueueBuf),

										dequeueCh:    make(chan Envelope, dequeueBuf),

									}

								}


								// enqueue returns an unbuffered write-only channel which a producer can send on.

								func (s *wdrrScheduler) enqueue() chan<- Envelope {

									return s.enqueueCh

								}


								// dequeue returns an unbuffered read-only channel which a consumer can read from.

								func (s *wdrrScheduler) dequeue() <-chan Envelope {

									return s.dequeueCh

								}


								func (s *wdrrScheduler) closed() <-chan struct{} {

									return s.closer.Done()

								}


								// close closes the WDRR queue. After this call enqueue() will block, so the

								// caller must select on closed() as well to avoid blocking forever. The

								// enqueue() and dequeue() along with the internal channels will NOT be closed.

								// Note, close() will block until all externally spawned goroutines have exited.

								func (s *wdrrScheduler) close() {

									s.closer.Close()

									<-s.doneCh.Done()

								}


								// start starts the WDRR queue process in a blocking goroutine. This must be

								// called before the queue can start to process and accept Envelopes.

								func (s *wdrrScheduler) start() {

									go s.process()

								}


								// process starts a blocking WDRR scheduler process, where we continuously

								// evaluate if we need to attempt to enqueue an Envelope or schedule Envelopes

								// to be dequeued and subsequently read and sent on the source connection.

								// Internally, each p2p Channel maps to a flow, where each flow has a deficit

								// and a quantum.

								//

								// For each Envelope requested to be enqueued, we evaluate if there is sufficient

								// capacity in the shared buffer to add the Envelope. If so, it is added.

								// Otherwise, we evaluate all flows of lower priority where we attempt find an

								// existing Envelope in the shared buffer of sufficient size that can be dropped

								// in place of the incoming Envelope. If there is no such Envelope that can be

								// dropped, then the incoming Envelope is dropped.

								//

								// When there is nothing to be enqueued, we perform the WDRR algorithm and

								// determine which Envelopes can be dequeued. For each Envelope that can be

								// dequeued, it is sent on the dequeueCh. Specifically, for each flow, if it is

								// non-empty, its deficit counter is incremented by its quantum value. Then, the

								// value of the deficit counter is a maximal amount of bytes that can be sent at

								// this round. If the deficit counter is greater than the Envelopes's message

								// size at the head of the queue (HoQ), this envelope can be sent and the value

								// of the counter is decremented by the message's size. Then, the size of the

								// next Envelopes's message is compared to the counter value, etc. Once the flow

								// is empty or the value of the counter is insufficient, the scheduler will skip

								// to the next flow. If the flow is empty, the value of the deficit counter is

								// reset to 0.

								//

								// XXX/TODO: Evaluate the single goroutine scheduler mechanism. In other words,

								// evaluate the effectiveness and performance of having a single goroutine

								// perform handling both enqueueing and dequeueing logic. Specifically, there

								// is potentially contention between reading off of enqueueCh and trying to

								// enqueue while also attempting to perform the WDRR algorithm and find the next

								// set of Envelope(s) to send on the dequeueCh. Alternatively, we could consider

								// separate scheduling goroutines, but then that requires the use of mutexes and

								// possibly a degrading performance.

								func (s *wdrrScheduler) process() {

									defer s.doneCh.Close()


									for {

										select {

										case <-s.closer.Done():

											return


										case e := <-s.enqueueCh:

											// attempt to enqueue the incoming Envelope

											chIDStr := strconv.Itoa(int(e.channelID))

											wEnv := wrappedEnvelope{envelope: e, size: uint(proto.Size(e.Message))}

											msgSize := wEnv.size


											s.metrics.PeerPendingSendBytes.With("peer_id", string(e.To)).Add(float64(msgSize))


											// If we're at capacity, we need to either drop the incoming Envelope or

											// an Envelope from a lower priority flow. Otherwise, we add the (wrapped)

											// envelope to the flow's queue.

											if s.size+wEnv.size > s.capacity {

												chPriority := s.chPriorities[e.channelID]


												var (

													canDrop  bool

													dropIdx  int

													dropChID ChannelID

												)


												// Evaluate all lower priority flows and determine if there exists an

												// Envelope that is of equal or greater size that we can drop in favor

												// of the incoming Envelope.

												for i := len(s.chDescs) - 1; i >= 0 && uint(s.chDescs[i].Priority) < chPriority && !canDrop; i-- {

													currChID := ChannelID(s.chDescs[i].ID)

													flow := s.buffer[currChID]


													for j := 0; j < len(flow) && !canDrop; j++ {

														if flow[j].size >= wEnv.size {

															canDrop = true

															dropIdx = j

															dropChID = currChID

															break

														}

													}

												}


												// If we can drop an existing Envelope, drop it and enqueue the incoming

												// Envelope.

												if canDrop {

													chIDStr = strconv.Itoa(int(dropChID))

													chPriority = s.chPriorities[dropChID]

													msgSize = s.buffer[dropChID][dropIdx].size


													// Drop Envelope for the lower priority flow and update the queue's

													// buffer size

													s.size -= msgSize

													s.buffer[dropChID] = append(s.buffer[dropChID][:dropIdx], s.buffer[dropChID][dropIdx+1:]...)


													// add the incoming Envelope and update queue's buffer size

													s.size += wEnv.size

													s.buffer[e.channelID] = append(s.buffer[e.channelID], wEnv)

													s.metrics.PeerQueueMsgSize.With("ch_id", chIDStr).Set(float64(wEnv.size))

												}


												// We either dropped the incoming Enevelope or one from an existing

												// lower priority flow.

												s.metrics.PeerQueueDroppedMsgs.With("ch_id", chIDStr).Add(1)

												s.logger.Debug(

													"dropped envelope",

													"ch_id", chIDStr,

													"priority", chPriority,

													"capacity", s.capacity,

													"msg_size", msgSize,

												)

											} else {

												// we have sufficient capacity to enqueue the incoming Envelope

												s.metrics.PeerQueueMsgSize.With("ch_id", chIDStr).Set(float64(wEnv.size))

												s.buffer[e.channelID] = append(s.buffer[e.channelID], wEnv)

												s.size += wEnv.size

											}


										default:

											// perform the WDRR algorithm

											for _, chDesc := range s.chDescs {

												chID := ChannelID(chDesc.ID)


												// only consider non-empty flows

												if len(s.buffer[chID]) > 0 {

													// bump flow's quantum

													s.deficits[chID] += s.quanta[chID]


													// grab the flow's current deficit counter and HoQ (wrapped) Envelope

													d := s.deficits[chID]

													we := s.buffer[chID][0]


													// While the flow is non-empty and we can send the current Envelope

													// on the dequeueCh:

													//

													// 1. send the Envelope

													// 2. update the scheduler's shared buffer's size

													// 3. update the flow's deficit

													// 4. remove from the flow's queue

													// 5. grab the next HoQ Envelope and flow's deficit

													for len(s.buffer[chID]) > 0 && d >= we.size {

														s.metrics.PeerSendBytesTotal.With(

															"chID", fmt.Sprint(chID),

															"peer_id", string(we.envelope.To)).Add(float64(we.size))

														s.dequeueCh <- we.envelope

														s.size -= we.size

														s.deficits[chID] -= we.size

														s.buffer[chID] = s.buffer[chID][1:]


														if len(s.buffer[chID]) > 0 {

															d = s.deficits[chID]

															we = s.buffer[chID][0]

														}

													}

												}


												// reset the flow's deficit to zero if it is empty

												if len(s.buffer[chID]) == 0 {

													s.deficits[chID] = 0

												}

											}

										}

									}

								}