internal/proxy: add initial set of abci metrics (#7115)
This PR adds an initial set of metrics for use ABCI. The initial metrics enable the calculation of timing histograms and call counts for each of the ABCI methods. The metrics are also labeled as either 'sync' or 'async' to determine if the method call was performed using ABCI's `*Async` methods.
An example of these metrics is included here for reference:
```
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0001"} 0
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0004"} 5
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.002"} 12
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.009"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.02"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.1"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.65"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="2"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="6"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="25"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="+Inf"} 13
tendermint_abci_connection_method_timing_sum{chain_id="ci",method="commit",type="sync"} 0.007802058000000001
tendermint_abci_connection_method_timing_count{chain_id="ci",method="commit",type="sync"} 13
```
These metrics can easily be graphed using prometheus's `histogram_quantile(...)` method to pick out a particular quantile to graph or examine. I chose buckets that were somewhat of an estimate of expected range of times for ABCI operations. They start at .0001 seconds and range to 25 seconds. The hope is that this range captures enough possible times to be useful for us and operators.
3 years ago internal/proxy: add initial set of abci metrics (#7115)
This PR adds an initial set of metrics for use ABCI. The initial metrics enable the calculation of timing histograms and call counts for each of the ABCI methods. The metrics are also labeled as either 'sync' or 'async' to determine if the method call was performed using ABCI's `*Async` methods.
An example of these metrics is included here for reference:
```
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0001"} 0
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0004"} 5
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.002"} 12
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.009"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.02"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.1"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.65"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="2"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="6"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="25"} 13
tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="+Inf"} 13
tendermint_abci_connection_method_timing_sum{chain_id="ci",method="commit",type="sync"} 0.007802058000000001
tendermint_abci_connection_method_timing_count{chain_id="ci",method="commit",type="sync"} 13
```
These metrics can easily be graphed using prometheus's `histogram_quantile(...)` method to pick out a particular quantile to graph or examine. I chose buckets that were somewhat of an estimate of expected range of times for ABCI operations. They start at .0001 seconds and range to 25 seconds. The hope is that this range captures enough possible times to be useful for us and operators.
3 years ago |
|
- package proxy
-
- import (
- "errors"
- "os"
- "os/signal"
- "syscall"
- "testing"
- "time"
-
- "github.com/stretchr/testify/assert"
- "github.com/stretchr/testify/mock"
- "github.com/stretchr/testify/require"
-
- abciclient "github.com/tendermint/tendermint/abci/client"
- abcimocks "github.com/tendermint/tendermint/abci/client/mocks"
- )
-
- func TestAppConns_Start_Stop(t *testing.T) {
- quitCh := make(<-chan struct{})
-
- clientMock := &abcimocks.Client{}
- clientMock.On("SetLogger", mock.Anything).Return().Times(4)
- clientMock.On("Start").Return(nil).Times(4)
- clientMock.On("Stop").Return(nil).Times(4)
- clientMock.On("Quit").Return(quitCh).Times(4)
-
- creatorCallCount := 0
- creator := func() (abciclient.Client, error) {
- creatorCallCount++
- return clientMock, nil
- }
-
- appConns := NewAppConns(creator, NopMetrics())
-
- err := appConns.Start()
- require.NoError(t, err)
-
- time.Sleep(100 * time.Millisecond)
-
- err = appConns.Stop()
- require.NoError(t, err)
-
- clientMock.AssertExpectations(t)
- assert.Equal(t, 4, creatorCallCount)
- }
-
- // Upon failure, we call tmos.Kill
- func TestAppConns_Failure(t *testing.T) {
- ok := make(chan struct{})
- c := make(chan os.Signal, 1)
- signal.Notify(c, syscall.SIGTERM)
- go func() {
- for range c {
- close(ok)
- }
- }()
-
- quitCh := make(chan struct{})
- var recvQuitCh <-chan struct{} // nolint:gosimple
- recvQuitCh = quitCh
-
- clientMock := &abcimocks.Client{}
- clientMock.On("SetLogger", mock.Anything).Return()
- clientMock.On("Start").Return(nil)
- clientMock.On("Stop").Return(nil)
-
- clientMock.On("Quit").Return(recvQuitCh)
- clientMock.On("Error").Return(errors.New("EOF")).Once()
-
- creator := func() (abciclient.Client, error) {
- return clientMock, nil
- }
-
- appConns := NewAppConns(creator, NopMetrics())
-
- err := appConns.Start()
- require.NoError(t, err)
- t.Cleanup(func() {
- if err := appConns.Stop(); err != nil {
- t.Error(err)
- }
- })
-
- // simulate failure
- close(quitCh)
-
- select {
- case <-ok:
- t.Log("SIGTERM successfully received")
- case <-time.After(5 * time.Second):
- t.Fatal("expected process to receive SIGTERM signal")
- }
- }
|