You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

284 lines
6.8 KiB

  1. package monitor
  2. import (
  3. "encoding/json"
  4. "math"
  5. "time"
  6. "github.com/go-kit/kit/log"
  7. "github.com/pkg/errors"
  8. crypto "github.com/tendermint/go-crypto"
  9. events "github.com/tendermint/go-events"
  10. rpc_client "github.com/tendermint/go-rpc/client"
  11. wire "github.com/tendermint/go-wire"
  12. ctypes "github.com/tendermint/tendermint/rpc/core/types"
  13. tmtypes "github.com/tendermint/tendermint/types"
  14. em "github.com/tendermint/tools/tm-monitor/eventmeter"
  15. )
  16. // remove when https://github.com/tendermint/go-rpc/issues/8 will be fixed
  17. type rpcClientI interface {
  18. Call(method string, params map[string]interface{}, result interface{}) (interface{}, error)
  19. }
  20. const maxRestarts = 25
  21. type Node struct {
  22. rpcAddr string
  23. IsValidator bool `json:"is_validator"` // validator or non-validator?
  24. pubKey crypto.PubKey `json:"pub_key"`
  25. Name string `json:"name"`
  26. Online bool `json:"online"`
  27. Height uint64 `json:"height"`
  28. BlockLatency float64 `json:"block_latency" wire:"unsafe"` // ms, interval between block commits
  29. // em holds the ws connection. Each eventMeter callback is called in a separate go-routine.
  30. em eventMeter
  31. // rpcClient is an client for making RPC calls to TM
  32. rpcClient rpcClientI
  33. blockCh chan<- tmtypes.Header
  34. blockLatencyCh chan<- float64
  35. disconnectCh chan<- bool
  36. checkIsValidatorInterval time.Duration
  37. quit chan struct{}
  38. logger log.Logger
  39. }
  40. func NewNode(rpcAddr string, options ...func(*Node)) *Node {
  41. em := em.NewEventMeter(rpcAddr, UnmarshalEvent)
  42. rpcClient := rpc_client.NewClientURI(rpcAddr) // HTTP client by default
  43. return NewNodeWithEventMeterAndRpcClient(rpcAddr, em, rpcClient, options...)
  44. }
  45. func NewNodeWithEventMeterAndRpcClient(rpcAddr string, em eventMeter, rpcClient rpcClientI, options ...func(*Node)) *Node {
  46. n := &Node{
  47. rpcAddr: rpcAddr,
  48. em: em,
  49. rpcClient: rpcClient,
  50. Name: rpcAddr,
  51. quit: make(chan struct{}),
  52. checkIsValidatorInterval: 5 * time.Second,
  53. logger: log.NewNopLogger(),
  54. }
  55. for _, option := range options {
  56. option(n)
  57. }
  58. return n
  59. }
  60. // SetCheckIsValidatorInterval lets you change interval for checking whenever
  61. // node is still a validator or not.
  62. func SetCheckIsValidatorInterval(d time.Duration) func(n *Node) {
  63. return func(n *Node) {
  64. n.checkIsValidatorInterval = d
  65. }
  66. }
  67. func (n *Node) SendBlocksTo(ch chan<- tmtypes.Header) {
  68. n.blockCh = ch
  69. }
  70. func (n *Node) SendBlockLatenciesTo(ch chan<- float64) {
  71. n.blockLatencyCh = ch
  72. }
  73. func (n *Node) NotifyAboutDisconnects(ch chan<- bool) {
  74. n.disconnectCh = ch
  75. }
  76. // SetLogger lets you set your own logger
  77. func (n *Node) SetLogger(l log.Logger) {
  78. n.logger = l
  79. }
  80. func (n *Node) Start() error {
  81. if err := n.em.Start(); err != nil {
  82. return err
  83. }
  84. n.em.RegisterLatencyCallback(latencyCallback(n))
  85. n.em.Subscribe(tmtypes.EventStringNewBlockHeader(), newBlockCallback(n))
  86. n.em.RegisterDisconnectCallback(disconnectCallback(n))
  87. n.Online = true
  88. n.checkIsValidator()
  89. go n.checkIsValidatorLoop()
  90. return nil
  91. }
  92. func (n *Node) Stop() {
  93. n.Online = false
  94. n.em.RegisterLatencyCallback(nil)
  95. n.em.Unsubscribe(tmtypes.EventStringNewBlockHeader())
  96. n.em.RegisterDisconnectCallback(nil)
  97. // FIXME stop blocks at event_meter.go:140
  98. // n.em.Stop()
  99. close(n.quit)
  100. }
  101. // implements eventmeter.EventCallbackFunc
  102. func newBlockCallback(n *Node) em.EventCallbackFunc {
  103. return func(metric *em.EventMetric, data events.EventData) {
  104. block := data.(tmtypes.EventDataNewBlockHeader).Header
  105. n.Height = uint64(block.Height)
  106. n.logger.Log("event", "new block", "height", block.Height, "numTxs", block.NumTxs)
  107. if n.blockCh != nil {
  108. n.blockCh <- *block
  109. }
  110. }
  111. }
  112. // implements eventmeter.EventLatencyFunc
  113. func latencyCallback(n *Node) em.LatencyCallbackFunc {
  114. return func(latency float64) {
  115. n.BlockLatency = latency / 1000000.0 // ns to ms
  116. n.logger.Log("event", "new block latency", "latency", n.BlockLatency)
  117. if n.blockLatencyCh != nil {
  118. n.blockLatencyCh <- latency
  119. }
  120. }
  121. }
  122. // implements eventmeter.DisconnectCallbackFunc
  123. func disconnectCallback(n *Node) em.DisconnectCallbackFunc {
  124. return func() {
  125. n.Online = false
  126. n.logger.Log("status", "down")
  127. if n.disconnectCh != nil {
  128. n.disconnectCh <- true
  129. }
  130. if err := n.RestartBackOff(); err != nil {
  131. n.logger.Log("err", errors.Wrap(err, "restart failed"))
  132. } else {
  133. n.Online = true
  134. n.logger.Log("status", "online")
  135. if n.disconnectCh != nil {
  136. n.disconnectCh <- false
  137. }
  138. }
  139. }
  140. }
  141. func (n *Node) RestartBackOff() error {
  142. attempt := 0
  143. for {
  144. d := time.Duration(math.Exp2(float64(attempt)))
  145. time.Sleep(d * time.Second)
  146. if err := n.Start(); err != nil {
  147. n.logger.Log("err", errors.Wrap(err, "restart failed"))
  148. } else {
  149. // TODO: authenticate pubkey
  150. return nil
  151. }
  152. attempt++
  153. if attempt > maxRestarts {
  154. return errors.New("Reached max restarts")
  155. }
  156. }
  157. }
  158. func (n *Node) NumValidators() (height uint64, num int, err error) {
  159. height, vals, err := n.validators()
  160. if err != nil {
  161. return 0, 0, err
  162. }
  163. return height, len(vals), nil
  164. }
  165. func (n *Node) validators() (height uint64, validators []*tmtypes.Validator, err error) {
  166. var result ctypes.TMResult
  167. if _, err = n.rpcClient.Call("validators", nil, &result); err != nil {
  168. return 0, make([]*tmtypes.Validator, 0), err
  169. }
  170. vals := result.(*ctypes.ResultValidators)
  171. return uint64(vals.BlockHeight), vals.Validators, nil
  172. }
  173. func (n *Node) checkIsValidatorLoop() {
  174. for {
  175. select {
  176. case <-n.quit:
  177. return
  178. case <-time.After(n.checkIsValidatorInterval):
  179. n.checkIsValidator()
  180. }
  181. }
  182. }
  183. func (n *Node) checkIsValidator() {
  184. _, validators, err := n.validators()
  185. if err == nil {
  186. for _, v := range validators {
  187. key, err := n.getPubKey()
  188. if err == nil && v.PubKey == key {
  189. n.IsValidator = true
  190. }
  191. }
  192. } else {
  193. n.logger.Log("err", errors.Wrap(err, "check is validator failed"))
  194. }
  195. }
  196. func (n *Node) getPubKey() (crypto.PubKey, error) {
  197. if n.pubKey != nil {
  198. return n.pubKey, nil
  199. }
  200. var result ctypes.TMResult
  201. _, err := n.rpcClient.Call("status", nil, &result)
  202. if err != nil {
  203. return nil, err
  204. }
  205. status := result.(*ctypes.ResultStatus)
  206. n.pubKey = status.PubKey
  207. return n.pubKey, nil
  208. }
  209. type eventMeter interface {
  210. Start() error
  211. Stop()
  212. RegisterLatencyCallback(em.LatencyCallbackFunc)
  213. RegisterDisconnectCallback(em.DisconnectCallbackFunc)
  214. Subscribe(string, em.EventCallbackFunc) error
  215. Unsubscribe(string) error
  216. }
  217. // UnmarshalEvent unmarshals a json event
  218. func UnmarshalEvent(b json.RawMessage) (string, events.EventData, error) {
  219. var err error
  220. result := new(ctypes.TMResult)
  221. wire.ReadJSONPtr(result, b, &err)
  222. if err != nil {
  223. return "", nil, err
  224. }
  225. event, ok := (*result).(*ctypes.ResultEvent)
  226. if !ok {
  227. return "", nil, nil // TODO: handle non-event messages (ie. return from subscribe/unsubscribe)
  228. // fmt.Errorf("Result is not type *ctypes.ResultEvent. Got %v", reflect.TypeOf(*result))
  229. }
  230. return event.Name, event.Data, nil
  231. }