Browse Source

p2p: Per channel metrics (#3666) (#3677)

* Add `chID` label to sent/receive byte mtrics
* add changelog pending entry
pull/3720/head
Sean Braithwaite 5 years ago
committed by Alexander Simmerl
parent
commit
c9ef824ddf
4 changed files with 46 additions and 33 deletions
  1. +1
    -0
      CHANGELOG_PENDING.md
  2. +28
    -28
      docs/tendermint-core/metrics.md
  3. +2
    -2
      p2p/metrics.go
  4. +15
    -3
      p2p/peer.go

+ 1
- 0
CHANGELOG_PENDING.md View File

@ -22,5 +22,6 @@
### FEATURES:
### IMPROVEMENTS:
- [p2p] \#3666 Add per channel telemtry to improve reactor observability
### BUG FIXES:

+ 28
- 28
docs/tendermint-core/metrics.md View File

@ -14,34 +14,34 @@ Listen address can be changed in the config file (see
The following metrics are available:
| **Name** | **Type** | **Since** | **Tags** | **Description** |
|-----------------------------------------|-----------|-----------|----------|-----------------------------------------------------------------|
| consensus\_height | Gauge | 0.21.0 | | Height of the chain |
| consensus\_validators | Gauge | 0.21.0 | | Number of validators |
| consensus\_validators\_power | Gauge | 0.21.0 | | Total voting power of all validators |
| consensus\_missing\_validators | Gauge | 0.21.0 | | Number of validators who did not sign |
| consensus\_missing\_validators\_power | Gauge | 0.21.0 | | Total voting power of the missing validators |
| consensus\_byzantine\_validators | Gauge | 0.21.0 | | Number of validators who tried to double sign |
| consensus\_byzantine\_validators\_power | Gauge | 0.21.0 | | Total voting power of the byzantine validators |
| consensus\_block\_interval\_seconds | Histogram | 0.21.0 | | Time between this and last block (Block.Header.Time) in seconds |
| consensus\_rounds | Gauge | 0.21.0 | | Number of rounds |
| consensus\_num\_txs | Gauge | 0.21.0 | | Number of transactions |
| consensus\_block\_parts | counter | on dev | peer\_id | number of blockparts transmitted by peer |
| consensus\_latest\_block\_height | gauge | on dev | | /status sync\_info number |
| consensus\_fast\_syncing | gauge | on dev | | either 0 (not fast syncing) or 1 (syncing) |
| consensus\_total\_txs | Gauge | 0.21.0 | | Total number of transactions committed |
| consensus\_block\_size\_bytes | Gauge | 0.21.0 | | Block size in bytes |
| p2p\_peers | Gauge | 0.21.0 | | Number of peers node's connected to |
| p2p\_peer\_receive\_bytes\_total | counter | on dev | peer\_id | number of bytes received from a given peer |
| p2p\_peer\_send\_bytes\_total | counter | on dev | peer\_id | number of bytes sent to a given peer |
| p2p\_peer\_pending\_send\_bytes | gauge | on dev | peer\_id | number of pending bytes to be sent to a given peer |
| p2p\_num\_txs | gauge | on dev | peer\_id | number of transactions submitted by each peer\_id |
| p2p\_pending\_send\_bytes | gauge | on dev | peer\_id | amount of data pending to be sent to peer |
| mempool\_size | Gauge | 0.21.0 | | Number of uncommitted transactions |
| mempool\_tx\_size\_bytes | histogram | on dev | | transaction sizes in bytes |
| mempool\_failed\_txs | counter | on dev | | number of failed transactions |
| mempool\_recheck\_times | counter | on dev | | number of transactions rechecked in the mempool |
| state\_block\_processing\_time | histogram | on dev | | time between BeginBlock and EndBlock in ms |
| **Name** | **Type** | **Since** | **Tags** | **Description** |
|-----------------------------------------|-----------|-----------|----------------|-----------------------------------------------------------------|
| consensus\_height | Gauge | 0.21.0 | | Height of the chain |
| consensus\_validators | Gauge | 0.21.0 | | Number of validators |
| consensus\_validators\_power | Gauge | 0.21.0 | | Total voting power of all validators |
| consensus\_missing\_validators | Gauge | 0.21.0 | | Number of validators who did not sign |
| consensus\_missing\_validators\_power | Gauge | 0.21.0 | | Total voting power of the missing validators |
| consensus\_byzantine\_validators | Gauge | 0.21.0 | | Number of validators who tried to double sign |
| consensus\_byzantine\_validators\_power | Gauge | 0.21.0 | | Total voting power of the byzantine validators |
| consensus\_block\_interval\_seconds | Histogram | 0.21.0 | | Time between this and last block (Block.Header.Time) in seconds |
| consensus\_rounds | Gauge | 0.21.0 | | Number of rounds |
| consensus\_num\_txs | Gauge | 0.21.0 | | Number of transactions |
| consensus\_block\_parts | counter | on dev | peer\_id | number of blockparts transmitted by peer |
| consensus\_latest\_block\_height | gauge | on dev | | /status sync\_info number |
| consensus\_fast\_syncing | gauge | on dev | | either 0 (not fast syncing) or 1 (syncing) |
| consensus\_total\_txs | Gauge | 0.21.0 | | Total number of transactions committed |
| consensus\_block\_size\_bytes | Gauge | 0.21.0 | | Block size in bytes |
| p2p\_peers | Gauge | 0.21.0 | | Number of peers node's connected to |
| p2p\_peer\_receive\_bytes\_total | counter | on dev | peer\_id, chID | number of bytes per channel received from a given peer |
| p2p\_peer\_send\_bytes\_total | counter | on dev | peer\_id, chID | number of bytes per channel sent to a given peer |
| p2p\_peer\_pending\_send\_bytes | gauge | on dev | peer\_id | number of pending bytes to be sent to a given peer |
| p2p\_num\_txs | gauge | on dev | peer\_id | number of transactions submitted by each peer\_id |
| p2p\_pending\_send\_bytes | gauge | on dev | peer\_id | amount of data pending to be sent to peer |
| mempool\_size | Gauge | 0.21.0 | | Number of uncommitted transactions |
| mempool\_tx\_size\_bytes | histogram | on dev | | transaction sizes in bytes |
| mempool\_failed\_txs | counter | on dev | | number of failed transactions |
| mempool\_recheck\_times | counter | on dev | | number of transactions rechecked in the mempool |
| state\_block\_processing\_time | histogram | on dev | | time between BeginBlock and EndBlock in ms |
## Useful queries


+ 2
- 2
p2p/metrics.go View File

@ -47,13 +47,13 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
Subsystem: MetricsSubsystem,
Name: "peer_receive_bytes_total",
Help: "Number of bytes received from a given peer.",
}, append(labels, "peer_id")).With(labelsAndValues...),
}, append(labels, "peer_id", "chID")).With(labelsAndValues...),
PeerSendBytesTotal: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "peer_send_bytes_total",
Help: "Number of bytes sent to a given peer.",
}, append(labels, "peer_id")).With(labelsAndValues...),
}, append(labels, "peer_id", "chID")).With(labelsAndValues...),
PeerPendingSendBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,


+ 15
- 3
p2p/peer.go View File

@ -248,7 +248,11 @@ func (p *peer) Send(chID byte, msgBytes []byte) bool {
}
res := p.mconn.Send(chID, msgBytes)
if res {
p.metrics.PeerSendBytesTotal.With("peer_id", string(p.ID())).Add(float64(len(msgBytes)))
labels := []string{
"peer_id", string(p.ID()),
"chID", fmt.Sprintf("%#x", chID),
}
p.metrics.PeerSendBytesTotal.With(labels...).Add(float64(len(msgBytes)))
}
return res
}
@ -263,7 +267,11 @@ func (p *peer) TrySend(chID byte, msgBytes []byte) bool {
}
res := p.mconn.TrySend(chID, msgBytes)
if res {
p.metrics.PeerSendBytesTotal.With("peer_id", string(p.ID())).Add(float64(len(msgBytes)))
labels := []string{
"peer_id", string(p.ID()),
"chID", fmt.Sprintf("%#x", chID),
}
p.metrics.PeerSendBytesTotal.With(labels...).Add(float64(len(msgBytes)))
}
return res
}
@ -369,7 +377,11 @@ func createMConnection(
// which does onPeerError.
panic(fmt.Sprintf("Unknown channel %X", chID))
}
p.metrics.PeerReceiveBytesTotal.With("peer_id", string(p.ID())).Add(float64(len(msgBytes)))
labels := []string{
"peer_id", string(p.ID()),
"chID", fmt.Sprintf("%#x", chID),
}
p.metrics.PeerReceiveBytesTotal.With(labels...).Add(float64(len(msgBytes)))
reactor.Receive(chID, p, msgBytes)
}


Loading…
Cancel
Save