You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

191 lines
4.6 KiB

  1. package main
  2. import (
  3. "math/rand"
  4. "time"
  5. tmtypes "github.com/tendermint/tendermint/types"
  6. )
  7. // waiting more than this many seconds for a block means we're unhealthy
  8. const nodeLivenessTimeout = 5 * time.Second
  9. // Monitor keeps track of the nodes and updates common statistics upon
  10. // receiving new events from nodes.
  11. //
  12. // Common statistics is stored in Network struct.
  13. type Monitor struct {
  14. Nodes map[string]*Node
  15. Network *Network
  16. monitorQuit chan struct{} // monitor exitting
  17. nodeQuit map[string]chan struct{} // node is being stopped and removed from under the monitor
  18. recalculateNetworkUptimeEvery time.Duration
  19. numValidatorsUpdateInterval time.Duration
  20. }
  21. // NewMonitor creates new instance of a Monitor. You can provide options to
  22. // change some default values.
  23. //
  24. // Example:
  25. // NewMonitor(monitor.SetNumValidatorsUpdateInterval(1 * time.Second))
  26. func NewMonitor(options ...func(*Monitor)) *Monitor {
  27. m := &Monitor{
  28. Nodes: make(map[string]*Node),
  29. Network: NewNetwork(),
  30. monitorQuit: make(chan struct{}),
  31. nodeQuit: make(map[string]chan struct{}),
  32. recalculateNetworkUptimeEvery: 10 * time.Second,
  33. numValidatorsUpdateInterval: 5 * time.Second,
  34. }
  35. for _, option := range options {
  36. option(m)
  37. }
  38. return m
  39. }
  40. // RecalculateNetworkUptimeEvery lets you change network uptime update interval.
  41. func RecalculateNetworkUptimeEvery(d time.Duration) func(m *Monitor) {
  42. return func(m *Monitor) {
  43. m.recalculateNetworkUptimeEvery = d
  44. }
  45. }
  46. // SetNumValidatorsUpdateInterval lets you change num validators update interval.
  47. func SetNumValidatorsUpdateInterval(d time.Duration) func(m *Monitor) {
  48. return func(m *Monitor) {
  49. m.numValidatorsUpdateInterval = d
  50. }
  51. }
  52. // Monitor begins to monitor the node `n`. The node will be started and added
  53. // to the monitor.
  54. func (m *Monitor) Monitor(n *Node) error {
  55. m.Nodes[n.Name] = n
  56. blockCh := make(chan tmtypes.Header, 10)
  57. n.SendBlocksTo(blockCh)
  58. blockLatencyCh := make(chan float64, 10)
  59. n.SendBlockLatenciesTo(blockLatencyCh)
  60. disconnectCh := make(chan bool, 10)
  61. n.NotifyAboutDisconnects(disconnectCh)
  62. if err := n.Start(); err != nil {
  63. return err
  64. }
  65. m.Network.NewNode(n.Name)
  66. m.nodeQuit[n.Name] = make(chan struct{})
  67. go m.listen(n.Name, blockCh, blockLatencyCh, disconnectCh, m.nodeQuit[n.Name])
  68. return nil
  69. }
  70. // Unmonitor stops monitoring node `n`. The node will be stopped and removed
  71. // from the monitor.
  72. func (m *Monitor) Unmonitor(n *Node) {
  73. m.Network.NodeDeleted(n.Name)
  74. n.Stop()
  75. close(m.nodeQuit[n.Name])
  76. delete(m.nodeQuit, n.Name)
  77. delete(m.Nodes, n.Name)
  78. }
  79. // Start starts the monitor's routines: recalculating network uptime and
  80. // updating number of validators.
  81. func (m *Monitor) Start() error {
  82. go m.recalculateNetworkUptimeLoop()
  83. go m.updateNumValidatorLoop()
  84. return nil
  85. }
  86. // Stop stops the monitor's routines.
  87. func (m *Monitor) Stop() {
  88. close(m.monitorQuit)
  89. for _, n := range m.Nodes {
  90. m.Unmonitor(n)
  91. }
  92. }
  93. // main loop where we listen for events from the node
  94. func (m *Monitor) listen(nodeName string, blockCh <-chan tmtypes.Header, blockLatencyCh <-chan float64, disconnectCh <-chan bool, quit <-chan struct{}) {
  95. for {
  96. select {
  97. case <-quit:
  98. return
  99. case b := <-blockCh:
  100. m.Network.NewBlock(b)
  101. m.Network.NodeIsOnline(nodeName)
  102. case l := <-blockLatencyCh:
  103. m.Network.NewBlockLatency(l)
  104. m.Network.NodeIsOnline(nodeName)
  105. case disconnected := <-disconnectCh:
  106. if disconnected {
  107. m.Network.NodeIsDown(nodeName)
  108. } else {
  109. m.Network.NodeIsOnline(nodeName)
  110. }
  111. case <-time.After(nodeLivenessTimeout):
  112. m.Network.NodeIsDown(nodeName)
  113. }
  114. }
  115. }
  116. // recalculateNetworkUptimeLoop every N seconds.
  117. func (m *Monitor) recalculateNetworkUptimeLoop() {
  118. for {
  119. select {
  120. case <-m.monitorQuit:
  121. return
  122. case <-time.After(m.recalculateNetworkUptimeEvery):
  123. m.Network.RecalculateUptime()
  124. }
  125. }
  126. }
  127. // updateNumValidatorLoop sends a request to a random node once every N seconds,
  128. // which in turn makes an RPC call to get the latest validators.
  129. func (m *Monitor) updateNumValidatorLoop() {
  130. rand.Seed(time.Now().Unix())
  131. var height uint64
  132. var num int
  133. var err error
  134. for {
  135. if 0 == len(m.Nodes) {
  136. time.Sleep(m.numValidatorsUpdateInterval)
  137. continue
  138. }
  139. randomNodeIndex := rand.Intn(len(m.Nodes))
  140. select {
  141. case <-m.monitorQuit:
  142. return
  143. case <-time.After(m.numValidatorsUpdateInterval):
  144. i := 0
  145. for _, n := range m.Nodes {
  146. if i == randomNodeIndex {
  147. height, num, err = n.NumValidators()
  148. if err != nil {
  149. log.Debug(err.Error())
  150. }
  151. break
  152. }
  153. i++
  154. }
  155. if m.Network.Height <= height {
  156. m.Network.NumValidators = num
  157. }
  158. }
  159. }
  160. }