|
|
- package types
-
- import (
- "fmt"
- "os"
- "os/exec"
- "sync"
- "time"
-
- "github.com/rcrowley/go-metrics"
- . "github.com/tendermint/go-common"
- tmtypes "github.com/tendermint/tendermint/types"
- )
-
- // waitign more than this many seconds for a block means we're unhealthy
- const newBlockTimeoutSeconds = 5
-
- //------------------------------------------------
- // blockchain types
- // NOTE: mintnet duplicates some types from here and val.go
- //------------------------------------------------
-
- // Known chain and validator set IDs (from which anything else can be found)
- // Returned by the Status RPC
- type ChainAndValidatorSetIDs struct {
- ChainIDs []string `json:"chain_ids"`
- ValidatorSetIDs []string `json:"validator_set_ids"`
- }
-
- //------------------------------------------------
- // chain state
-
- // Main chain state
- // Returned over RPC; also used to manage state
- type ChainState struct {
- Config *BlockchainConfig `json:"config"`
- Status *BlockchainStatus `json:"status"`
- }
-
- func (cs *ChainState) NewBlock(block *tmtypes.Header) {
- cs.Status.NewBlock(block)
- }
-
- func (cs *ChainState) UpdateLatency(oldLatency, newLatency float64) {
- cs.Status.UpdateLatency(oldLatency, newLatency)
- }
-
- func (cs *ChainState) SetOnline(val *ValidatorState, isOnline bool) {
- cs.Status.SetOnline(val, isOnline)
- }
-
- //------------------------------------------------
- // Blockchain Config: id, validator config
-
- // Chain Config
- type BlockchainConfig struct {
- // should be fixed for life of chain
- ID string `json:"id"`
- ValSetID string `json:"val_set_id"` // NOTE: do we really commit to one val set per chain?
-
- // handles live validator states (latency, last block, etc)
- // and validator set changes
- mtx sync.Mutex
- Validators []*ValidatorState `json:"validators"` // TODO: this should be ValidatorConfig and the state in BlockchainStatus
- valIDMap map[string]int // map IDs to indices
- }
-
- // So we can fetch validator by id rather than index
- func (bc *BlockchainConfig) PopulateValIDMap() {
- bc.mtx.Lock()
- defer bc.mtx.Unlock()
- bc.valIDMap = make(map[string]int)
- for i, v := range bc.Validators {
- bc.valIDMap[v.Config.Validator.ID] = i
- }
- }
-
- func (bc *BlockchainConfig) GetValidatorByID(valID string) (*ValidatorState, error) {
- bc.mtx.Lock()
- defer bc.mtx.Unlock()
- valIndex, ok := bc.valIDMap[valID]
- if !ok {
- return nil, fmt.Errorf("Unknown validator %s", valID)
- }
- return bc.Validators[valIndex], nil
- }
-
- //------------------------------------------------
- // BlockchainStatus
-
- // Basic blockchain metrics
- type BlockchainStatus struct {
- mtx sync.Mutex
-
- // Blockchain Info
- Height int `json:"height"` // latest height we've got
- BlockchainSize int64 `json:"blockchain_size"`
- MeanBlockTime float64 `json:"mean_block_time" wire:"unsafe"` // ms (avg over last minute)
- TxThroughput float64 `json:"tx_throughput" wire:"unsafe"` // tx/s (avg over last minute)
-
- blockTimeMeter metrics.Meter
- txThroughputMeter metrics.Meter
-
- // Network Info
- NumValidators int `json:"num_validators"`
- ActiveValidators int `json:"active_validators"`
- //ActiveNodes int `json:"active_nodes"`
- MeanLatency float64 `json:"mean_latency" wire:"unsafe"` // ms
-
- // Health
- FullHealth bool `json:"full_health"` // all validators online, synced, making blocks
- Healthy bool `json:"healthy"` // we're making blocks
-
- // Uptime
- UptimeData *UptimeData `json:"uptime_data"`
-
- // What else can we get / do we want?
- // TODO: charts for block time, latency (websockets/event-meter ?)
-
- // for benchmark runs
- benchResults *BenchmarkResults
- }
-
- func (bc *BlockchainStatus) BenchmarkTxs(results chan *BenchmarkResults, nTxs int, args []string) {
- log.Notice("Running benchmark", "ntxs", nTxs)
- bc.benchResults = &BenchmarkResults{
- StartTime: time.Now(),
- nTxs: nTxs,
- results: results,
- }
-
- if len(args) > 0 {
- // TODO: capture output to file
- cmd := exec.Command(args[0], args[1:]...)
- cmd.Stdout = os.Stdout
- cmd.Stderr = os.Stderr
- go cmd.Run()
- }
- }
-
- func (bc *BlockchainStatus) BenchmarkBlocks(results chan *BenchmarkResults, nBlocks int, args []string) {
- log.Notice("Running benchmark", "nblocks", nBlocks)
- bc.benchResults = &BenchmarkResults{
- StartTime: time.Now(),
- nBlocks: nBlocks,
- results: results,
- }
-
- if len(args) > 0 {
- // TODO: capture output to file
- cmd := exec.Command(args[0], args[1:]...)
- cmd.Stdout = os.Stdout
- cmd.Stderr = os.Stderr
- go cmd.Run()
- }
- }
-
- type Block struct {
- Time time.Time `json:time"`
- Height int `json:"height"`
- NumTxs int `json:"num_txs"`
- }
-
- type BenchmarkResults struct {
- StartTime time.Time `json:"start_time"`
- StartBlock int `json:"start_block"`
- TotalTime float64 `json:"total_time"` // seconds
- Blocks []*Block `json:"blocks"`
- NumBlocks int `json:"num_blocks"`
- NumTxs int `json:"num_txs`
- MeanLatency float64 `json:"latency"` // seconds per block
- MeanThroughput float64 `json:"throughput"` // txs per second
-
- // either we wait for n blocks or n txs
- nBlocks int
- nTxs int
-
- done bool
- results chan *BenchmarkResults
- }
-
- // Return the total time to commit all txs, in seconds
- func (br *BenchmarkResults) ElapsedTime() float64 {
- return float64(br.Blocks[br.NumBlocks-1].Time.Sub(br.StartTime)) / float64(1000000000)
- }
-
- // Return the avg seconds/block
- func (br *BenchmarkResults) Latency() float64 {
- return br.ElapsedTime() / float64(br.NumBlocks)
- }
-
- // Return the avg txs/second
- func (br *BenchmarkResults) Throughput() float64 {
- return float64(br.NumTxs) / br.ElapsedTime()
- }
-
- func (br *BenchmarkResults) Done() {
- log.Info("Done benchmark", "num blocks", br.NumBlocks, "block len", len(br.Blocks))
- br.done = true
- br.TotalTime = br.ElapsedTime()
- br.MeanThroughput = br.Throughput()
- br.MeanLatency = br.Latency()
- br.results <- br
- }
-
- type UptimeData struct {
- StartTime time.Time `json:"start_time"`
- Uptime float64 `json:"uptime" wire:"unsafe"` // Percentage of time we've been Healthy, ever
-
- totalDownTime time.Duration // total downtime (only updated when we come back online)
- wentDown time.Time
-
- // TODO: uptime over last day, month, year
- }
-
- func NewBlockchainStatus() *BlockchainStatus {
- return &BlockchainStatus{
- blockTimeMeter: metrics.NewMeter(),
- txThroughputMeter: metrics.NewMeter(),
- Healthy: true,
- UptimeData: &UptimeData{
- StartTime: time.Now(),
- Uptime: 100.0,
- },
- }
- }
-
- func (s *BlockchainStatus) NewBlock(block *tmtypes.Header) {
- s.mtx.Lock()
- defer s.mtx.Unlock()
- if block.Height > s.Height {
- numTxs := block.NumTxs
- s.Height = block.Height
- s.blockTimeMeter.Mark(1)
- s.txThroughputMeter.Mark(int64(numTxs))
- s.MeanBlockTime = (1.0 / s.blockTimeMeter.Rate1()) * 1000 // 1/s to ms
- s.TxThroughput = s.txThroughputMeter.Rate1()
-
- log.Debug("New Block", "height", s.Height, "ntxs", numTxs)
- if s.benchResults != nil && !s.benchResults.done {
- if s.benchResults.StartBlock == 0 && numTxs > 0 {
- s.benchResults.StartBlock = s.Height
- }
- s.benchResults.Blocks = append(s.benchResults.Blocks, &Block{
- Time: time.Now(),
- Height: s.Height,
- NumTxs: numTxs,
- })
- s.benchResults.NumTxs += numTxs
- s.benchResults.NumBlocks += 1
- if s.benchResults.nTxs > 0 && s.benchResults.NumTxs >= s.benchResults.nTxs {
- s.benchResults.Done()
- } else if s.benchResults.nBlocks > 0 && s.benchResults.NumBlocks >= s.benchResults.nBlocks {
- s.benchResults.Done()
- }
- }
-
- // if we're making blocks, we're healthy
- if !s.Healthy {
- s.Healthy = true
- s.UptimeData.totalDownTime += time.Since(s.UptimeData.wentDown)
- }
-
- // if we are connected to all validators, we're at full health
- // TODO: make sure they're all at the same height (within a block) and all proposing (and possibly validating )
- // Alternatively, just check there hasn't been a new round in numValidators rounds
- if s.ActiveValidators == s.NumValidators {
- s.FullHealth = true
- }
-
- // TODO: should we refactor so there's a central loop and ticker?
- go s.newBlockTimeout(s.Height)
- }
- }
-
- // we have newBlockTimeoutSeconds to make a new block, else we're unhealthy
- func (s *BlockchainStatus) newBlockTimeout(height int) {
- time.Sleep(time.Second * newBlockTimeoutSeconds)
-
- s.mtx.Lock()
- defer s.mtx.Unlock()
- if !(s.Height > height) {
- s.Healthy = false
- s.UptimeData.wentDown = time.Now()
- }
- }
-
- // Used to calculate uptime on demand. TODO: refactor this into the central loop ...
- func (s *BlockchainStatus) RealTimeUpdates() {
- s.mtx.Lock()
- defer s.mtx.Unlock()
- since := time.Since(s.UptimeData.StartTime)
- uptime := since - s.UptimeData.totalDownTime
- if !s.Healthy {
- uptime -= time.Since(s.UptimeData.wentDown)
- }
- s.UptimeData.Uptime = float64(uptime) / float64(since)
- }
-
- func (s *BlockchainStatus) UpdateLatency(oldLatency, newLatency float64) {
- s.mtx.Lock()
- defer s.mtx.Unlock()
-
- // update avg validator rpc latency
- mean := s.MeanLatency * float64(s.NumValidators)
- mean = (mean - oldLatency + newLatency) / float64(s.NumValidators)
- s.MeanLatency = mean
- }
-
- // Toggle validators online/offline (updates ActiveValidators and FullHealth)
- func (s *BlockchainStatus) SetOnline(val *ValidatorState, isOnline bool) {
- val.SetOnline(isOnline)
-
- var change int
- if isOnline {
- change = 1
- } else {
- change = -1
- }
-
- s.mtx.Lock()
- defer s.mtx.Unlock()
-
- s.ActiveValidators += change
-
- if s.ActiveValidators > s.NumValidators {
- panic(Fmt("got %d validators. max %ds", s.ActiveValidators, s.NumValidators))
- }
-
- // if we lost a connection we're no longer at full health, even if it's still online.
- // so long as we receive blocks, we'll know we're still healthy
- if s.ActiveValidators != s.NumValidators {
- s.FullHealth = false
- }
- }
-
- func TwoThirdsMaj(count, total int) bool {
- return float64(count) > (2.0/3.0)*float64(total)
- }
|