You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

231 lines
6.6 KiB

9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
  1. package types
  2. import (
  3. "fmt"
  4. "sync"
  5. "time"
  6. "github.com/rcrowley/go-metrics"
  7. . "github.com/tendermint/go-common"
  8. tmtypes "github.com/tendermint/tendermint/types"
  9. )
  10. // waitign more than this many seconds for a block means we're unhealthy
  11. const newBlockTimeoutSeconds = 5
  12. //------------------------------------------------
  13. // blockchain types
  14. //------------------------------------------------
  15. // Known chain and validator set IDs (from which anything else can be found)
  16. // Returned by the Status RPC
  17. type ChainAndValidatorSetIDs struct {
  18. ChainIDs []string `json:"chain_ids"`
  19. ValidatorSetIDs []string `json:"validator_set_ids"`
  20. }
  21. //------------------------------------------------
  22. // chain state
  23. // Main chain state
  24. // Returned over RPC; also used to manage state
  25. type ChainState struct {
  26. Config *BlockchainConfig `json:"config"`
  27. Status *BlockchainStatus `json:"status"`
  28. }
  29. func (cs *ChainState) NewBlock(block *tmtypes.Block) {
  30. cs.Status.NewBlock(block)
  31. }
  32. func (cs *ChainState) UpdateLatency(oldLatency, newLatency float64) {
  33. cs.Status.UpdateLatency(oldLatency, newLatency)
  34. }
  35. func (cs *ChainState) SetOnline(val *ValidatorState, isOnline bool) {
  36. cs.Status.SetOnline(val, isOnline)
  37. }
  38. //------------------------------------------------
  39. // Blockchain Config: id, validator config
  40. // Chain Config
  41. type BlockchainConfig struct {
  42. // should be fixed for life of chain
  43. ID string `json:"id"`
  44. ValSetID string `json:"val_set_id"` // NOTE: do we really commit to one val set per chain?
  45. // handles live validator states (latency, last block, etc)
  46. // and validator set changes
  47. mtx sync.Mutex
  48. Validators []*ValidatorState `json:"validators"` // TODO: this should be ValidatorConfig and the state in BlockchainStatus
  49. valIDMap map[string]int // map IDs to indices
  50. }
  51. // So we can fetch validator by id rather than index
  52. func (bc *BlockchainConfig) PopulateValIDMap() {
  53. bc.mtx.Lock()
  54. defer bc.mtx.Unlock()
  55. bc.valIDMap = make(map[string]int)
  56. for i, v := range bc.Validators {
  57. bc.valIDMap[v.Config.Validator.ID] = i
  58. }
  59. }
  60. func (bc *BlockchainConfig) GetValidatorByID(valID string) (*ValidatorState, error) {
  61. bc.mtx.Lock()
  62. defer bc.mtx.Unlock()
  63. valIndex, ok := bc.valIDMap[valID]
  64. if !ok {
  65. return nil, fmt.Errorf("Unknown validator %s", valID)
  66. }
  67. return bc.Validators[valIndex], nil
  68. }
  69. //------------------------------------------------
  70. // BlockchainStatus
  71. // Basic blockchain metrics
  72. type BlockchainStatus struct {
  73. mtx sync.Mutex
  74. // Blockchain Info
  75. Height int `json:"height"` // latest height we've got
  76. BlockchainSize int64 `json:"blockchain_size"`
  77. MeanBlockTime float64 `json:"mean_block_time" wire:"unsafe"` // ms (avg over last minute)
  78. TxThroughput float64 `json:"tx_throughput" wire:"unsafe"` // tx/s (avg over last minute)
  79. blockTimeMeter metrics.Meter
  80. txThroughputMeter metrics.Meter
  81. // Network Info
  82. NumValidators int `json:"num_validators"`
  83. ActiveValidators int `json:"active_validators"`
  84. //ActiveNodes int `json:"active_nodes"`
  85. MeanLatency float64 `json:"mean_latency" wire:"unsafe"` // ms
  86. // Health
  87. FullHealth bool `json:"full_health"` // all validators online, synced, making blocks
  88. Healthy bool `json:"healthy"` // we're making blocks
  89. // Uptime
  90. UptimeData *UptimeData `json:"uptime_data"`
  91. // What else can we get / do we want?
  92. // TODO: charts for block time, latency (websockets/event-meter ?)
  93. }
  94. type UptimeData struct {
  95. StartTime time.Time `json:"start_time"`
  96. Uptime float64 `json:"uptime" wire:"unsafe"` // Percentage of time we've been Healthy, ever
  97. totalDownTime time.Duration // total downtime (only updated when we come back online)
  98. wentDown time.Time
  99. // TODO: uptime over last day, month, year
  100. }
  101. func NewBlockchainStatus() *BlockchainStatus {
  102. return &BlockchainStatus{
  103. blockTimeMeter: metrics.NewMeter(),
  104. txThroughputMeter: metrics.NewMeter(),
  105. Healthy: true,
  106. UptimeData: &UptimeData{
  107. StartTime: time.Now(),
  108. Uptime: 100.0,
  109. },
  110. }
  111. }
  112. func (s *BlockchainStatus) NewBlock(block *tmtypes.Block) {
  113. s.mtx.Lock()
  114. defer s.mtx.Unlock()
  115. if block.Header.Height > s.Height {
  116. s.Height = block.Header.Height
  117. s.blockTimeMeter.Mark(1)
  118. s.txThroughputMeter.Mark(int64(block.Header.NumTxs))
  119. s.MeanBlockTime = (1 / s.blockTimeMeter.Rate1()) * 1000 // 1/s to ms
  120. s.TxThroughput = s.txThroughputMeter.Rate1()
  121. // if we're making blocks, we're healthy
  122. if !s.Healthy {
  123. s.Healthy = true
  124. s.UptimeData.totalDownTime += time.Since(s.UptimeData.wentDown)
  125. }
  126. // if we are connected to all validators, we're at full health
  127. // TODO: make sure they're all at the same height (within a block) and all proposing (and possibly validating )
  128. // Alternatively, just check there hasn't been a new round in numValidators rounds
  129. if s.ActiveValidators == s.NumValidators {
  130. s.FullHealth = true
  131. }
  132. // TODO: should we refactor so there's a central loop and ticker?
  133. go s.newBlockTimeout(s.Height)
  134. }
  135. }
  136. // we have newBlockTimeoutSeconds to make a new block, else we're unhealthy
  137. func (s *BlockchainStatus) newBlockTimeout(height int) {
  138. time.Sleep(time.Second * newBlockTimeoutSeconds)
  139. s.mtx.Lock()
  140. defer s.mtx.Unlock()
  141. if !(s.Height > height) {
  142. s.Healthy = false
  143. s.UptimeData.wentDown = time.Now()
  144. }
  145. }
  146. // Used to calculate uptime on demand. TODO: refactor this into the central loop ...
  147. func (s *BlockchainStatus) RealTimeUpdates() {
  148. s.mtx.Lock()
  149. defer s.mtx.Unlock()
  150. since := time.Since(s.UptimeData.StartTime)
  151. uptime := since - s.UptimeData.totalDownTime
  152. if !s.Healthy {
  153. uptime -= time.Since(s.UptimeData.wentDown)
  154. }
  155. s.UptimeData.Uptime = float64(uptime) / float64(since)
  156. }
  157. func (s *BlockchainStatus) UpdateLatency(oldLatency, newLatency float64) {
  158. s.mtx.Lock()
  159. defer s.mtx.Unlock()
  160. // update avg validator rpc latency
  161. mean := s.MeanLatency * float64(s.NumValidators)
  162. mean = (mean - oldLatency + newLatency) / float64(s.NumValidators)
  163. s.MeanLatency = mean
  164. }
  165. // Toggle validators online/offline (updates ActiveValidators and FullHealth)
  166. func (s *BlockchainStatus) SetOnline(val *ValidatorState, isOnline bool) {
  167. val.SetOnline(isOnline)
  168. var change int
  169. if isOnline {
  170. change = 1
  171. } else {
  172. change = -1
  173. }
  174. s.mtx.Lock()
  175. defer s.mtx.Unlock()
  176. s.ActiveValidators += change
  177. if s.ActiveValidators > s.NumValidators {
  178. panic(Fmt("got %d validators. max %ds", s.ActiveValidators, s.NumValidators))
  179. }
  180. // if we lost a connection we're no longer at full health, even if it's still online.
  181. // so long as we receive blocks, we'll know we're still healthy
  182. if s.ActiveValidators != s.NumValidators {
  183. s.FullHealth = false
  184. }
  185. }
  186. func TwoThirdsMaj(count, total int) bool {
  187. return float64(count) > (2.0/3.0)*float64(total)
  188. }