From 7368ba358b0508ad5f37b6066fe9dcbef098e51a Mon Sep 17 00:00:00 2001 From: Marko Date: Thu, 19 Dec 2019 18:04:57 +0100 Subject: [PATCH] prometheus/metrics: three new metrics for consensus (#4263) * prometheus/metrics: two new metrics for consensus - add consensus_validator_power metric so if a node is a validator it can see its own power in prometheus - add last_signed_height metric so if a node is a validator it can be aware of at which height the most recent time it signed. - closes: #3773 - closes: #3083 Signed-off-by: Marko Baricevic * check if signature is present * minor change and check if sig is not absent * add changelog entry * Update consensus/state.go Co-Authored-By: Anton Kaliaev * Update CHANGELOG_PENDING.md Co-Authored-By: Anton Kaliaev * Update CHANGELOG_PENDING.md * add label with validator address * change address to vali_address * add metric missed blocks * add changelog entry for missed blocks metric * address naming & add docs --- CHANGELOG_PENDING.md | 5 +++ consensus/metrics.go | 29 ++++++++++++++++ consensus/state.go | 13 ++++++++ docs/tendermint-core/metrics.md | 59 +++++++++++++++++---------------- 4 files changed, 78 insertions(+), 28 deletions(-) diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index f1c0a7910..d28c727b9 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -113,6 +113,11 @@ program](https://hackerone.com/tendermint). - [cli] \#4234 Add `--db_backend and --db_dir` flags (@princesinha19) - [cli] \#4113 Add optional `--genesis_hash` flag to check genesis hash upon startup - [config] \#3831 Add support for [RocksDB](https://rocksdb.org/) (@Stumble) +- [metrics] \#4263 Add + - `consensus_validator_power`: track your validators power + - `consensus_validator_last_signed_height`: track at which height the validator last signed + - `consensus_validator_missed_blocks`: total amount of missed blocks for a validator + as gauges in prometheus for validator specific metrics ### IMPROVEMENTS: diff --git a/consensus/metrics.go b/consensus/metrics.go index b5207742c..5fa27118a 100644 --- a/consensus/metrics.go +++ b/consensus/metrics.go @@ -19,6 +19,9 @@ type Metrics struct { // Height of the chain. Height metrics.Gauge + // ValidatorLastSignedHeight of a validator. + ValidatorLastSignedHeight metrics.Gauge + // Number of rounds. Rounds metrics.Gauge @@ -26,6 +29,10 @@ type Metrics struct { Validators metrics.Gauge // Total power of all validators. ValidatorsPower metrics.Gauge + // Power of a validator. + ValidatorPower metrics.Gauge + // Amount of blocks missed by a validator. + ValidatorMissedBlocks metrics.Gauge // Number of validators who did not sign. MissingValidators metrics.Gauge // Total power of the missing validators. @@ -81,12 +88,30 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Name: "validators", Help: "Number of validators.", }, labels).With(labelsAndValues...), + ValidatorLastSignedHeight: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "validator_last_signed_height", + Help: "Last signed height for a validator", + }, append(labels, "validator_address")).With(labelsAndValues...), + ValidatorMissedBlocks: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "validator_missed_blocks", + Help: "Total missed blocks for a validator", + }, append(labels, "validator_address")).With(labelsAndValues...), ValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "validators_power", Help: "Total power of all validators.", }, labels).With(labelsAndValues...), + ValidatorPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "validator_power", + Help: "Power of a validator", + }, append(labels, "validator_address")).With(labelsAndValues...), MissingValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, @@ -163,10 +188,14 @@ func NopMetrics() *Metrics { return &Metrics{ Height: discard.NewGauge(), + ValidatorLastSignedHeight: discard.NewGauge(), + Rounds: discard.NewGauge(), Validators: discard.NewGauge(), ValidatorsPower: discard.NewGauge(), + ValidatorPower: discard.NewGauge(), + ValidatorMissedBlocks: discard.NewGauge(), MissingValidators: discard.NewGauge(), MissingValidatorsPower: discard.NewGauge(), ByzantineValidators: discard.NewGauge(), diff --git a/consensus/state.go b/consensus/state.go index 44bb54382..b5fb1c0d1 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -1486,6 +1486,19 @@ func (cs *State) recordMetrics(height int64, block *types.Block) { for i, val := range cs.LastValidators.Validators { commitSig := block.LastCommit.Signatures[i] + privValAddress := cs.privValidator.GetPubKey().Address() + if cs.privValidator != nil && bytes.Equal(val.Address, privValAddress) { + label := []string{ + "validator_address", privValAddress.String(), + } + cs.metrics.ValidatorPower.With(label...).Set(float64(val.VotingPower)) + if !commitSig.Absent() { + cs.metrics.ValidatorLastSignedHeight.With(label...).Set(float64(height)) + } else { + cs.metrics.ValidatorMissedBlocks.With(label...).Add(float64(1)) + } + } + if commitSig.Absent() { missingValidators++ missingValidatorsPower += val.VotingPower diff --git a/docs/tendermint-core/metrics.md b/docs/tendermint-core/metrics.md index b2d143369..c6ac6c3a7 100644 --- a/docs/tendermint-core/metrics.md +++ b/docs/tendermint-core/metrics.md @@ -18,34 +18,37 @@ Listen address can be changed in the config file (see The following metrics are available: -| **Name** | **Type** | **Since** | **Tags** | **Description** | -| --------------------------------------- | --------- | --------- | -------------- | --------------------------------------------------------------- | -| consensus\_height | Gauge | 0.21.0 | | Height of the chain | -| consensus\_validators | Gauge | 0.21.0 | | Number of validators | -| consensus\_validators\_power | Gauge | 0.21.0 | | Total voting power of all validators | -| consensus\_missing\_validators | Gauge | 0.21.0 | | Number of validators who did not sign | -| consensus\_missing\_validators\_power | Gauge | 0.21.0 | | Total voting power of the missing validators | -| consensus\_byzantine\_validators | Gauge | 0.21.0 | | Number of validators who tried to double sign | -| consensus\_byzantine\_validators\_power | Gauge | 0.21.0 | | Total voting power of the byzantine validators | -| consensus\_block\_interval\_seconds | Histogram | 0.21.0 | | Time between this and last block (Block.Header.Time) in seconds | -| consensus\_rounds | Gauge | 0.21.0 | | Number of rounds | -| consensus\_num\_txs | Gauge | 0.21.0 | | Number of transactions | -| consensus\_total\_txs | Gauge | 0.21.0 | | Total number of transactions committed | -| consensus\_block\_parts | counter | on dev | peer\_id | number of blockparts transmitted by peer | -| consensus\_latest\_block\_height | gauge | on dev | | /status sync\_info number | -| consensus\_fast\_syncing | gauge | on dev | | either 0 (not fast syncing) or 1 (syncing) | -| consensus\_block\_size\_bytes | Gauge | 0.21.0 | | Block size in bytes | -| p2p\_peers | Gauge | 0.21.0 | | Number of peers node's connected to | -| p2p\_peer\_receive\_bytes\_total | counter | on dev | peer\_id, chID | number of bytes per channel received from a given peer | -| p2p\_peer\_send\_bytes\_total | counter | on dev | peer\_id, chID | number of bytes per channel sent to a given peer | -| p2p\_peer\_pending\_send\_bytes | gauge | on dev | peer\_id | number of pending bytes to be sent to a given peer | -| p2p\_num\_txs | gauge | on dev | peer\_id | number of transactions submitted by each peer\_id | -| p2p\_pending\_send\_bytes | gauge | on dev | peer\_id | amount of data pending to be sent to peer | -| mempool\_size | Gauge | 0.21.0 | | Number of uncommitted transactions | -| mempool\_tx\_size\_bytes | histogram | on dev | | transaction sizes in bytes | -| mempool\_failed\_txs | counter | on dev | | number of failed transactions | -| mempool\_recheck\_times | counter | on dev | | number of transactions rechecked in the mempool | -| state\_block\_processing\_time | histogram | on dev | | time between BeginBlock and EndBlock in ms | +| **Name** | **Type** | **Since** | **Tags** | **Description** | +| -------------------------------------- | --------- | --------- | ------------- | ---------------------------------------------------------------------- | +| consensus_height | Gauge | 0.21.0 | | Height of the chain | +| consensus_validators | Gauge | 0.21.0 | | Number of validators | +| consensus_validators_power | Gauge | 0.21.0 | | Total voting power of all validators | +| consensus_validator_power | Gauge | 0.33.0 | | Voting power of the node if in the validator set | +| consensus_validator_last_signed_height | Gauge | 0.33.0 | | Last height the node signed a block, if the node is a validator | +| consensus_validator_missed_blocks | Gauge | 0.33.0 | | Total amount of blocks missed for the node, if the node is a validator | +| consensus_missing_validators | Gauge | 0.21.0 | | Number of validators who did not sign | +| consensus_missing_validators_power | Gauge | 0.21.0 | | Total voting power of the missing validators | +| consensus_byzantine_validators | Gauge | 0.21.0 | | Number of validators who tried to double sign | +| consensus_byzantine_validators_power | Gauge | 0.21.0 | | Total voting power of the byzantine validators | +| consensus_block_interval_seconds | Histogram | 0.21.0 | | Time between this and last block (Block.Header.Time) in seconds | +| consensus_rounds | Gauge | 0.21.0 | | Number of rounds | +| consensus_num_txs | Gauge | 0.21.0 | | Number of transactions | +| consensus_total_txs | Gauge | 0.21.0 | | Total number of transactions committed | +| consensus_block_parts | counter | on dev | peer_id | number of blockparts transmitted by peer | +| consensus_latest_block_height | gauge | on dev | | /status sync_info number | +| consensus_fast_syncing | gauge | on dev | | either 0 (not fast syncing) or 1 (syncing) | +| consensus_block_size_bytes | Gauge | 0.21.0 | | Block size in bytes | +| p2p_peers | Gauge | 0.21.0 | | Number of peers node's connected to | +| p2p_peer_receive_bytes_total | counter | on dev | peer_id, chID | number of bytes per channel received from a given peer | +| p2p_peer_send_bytes_total | counter | on dev | peer_id, chID | number of bytes per channel sent to a given peer | +| p2p_peer_pending_send_bytes | gauge | on dev | peer_id | number of pending bytes to be sent to a given peer | +| p2p_num_txs | gauge | on dev | peer_id | number of transactions submitted by each peer_id | +| p2p_pending_send_bytes | gauge | on dev | peer_id | amount of data pending to be sent to peer | +| mempool_size | Gauge | 0.21.0 | | Number of uncommitted transactions | +| mempool_tx_size_bytes | histogram | on dev | | transaction sizes in bytes | +| mempool_failed_txs | counter | on dev | | number of failed transactions | +| mempool_recheck_times | counter | on dev | | number of transactions rechecked in the mempool | +| state_block_processing_time | histogram | on dev | | time between BeginBlock and EndBlock in ms | ## Useful queries