You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

131 lines
3.4 KiB

8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
internal/proxy: add initial set of abci metrics (#7115) This PR adds an initial set of metrics for use ABCI. The initial metrics enable the calculation of timing histograms and call counts for each of the ABCI methods. The metrics are also labeled as either 'sync' or 'async' to determine if the method call was performed using ABCI's `*Async` methods. An example of these metrics is included here for reference: ``` tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0001"} 0 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0004"} 5 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.002"} 12 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.009"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.02"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.1"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.65"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="2"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="6"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="25"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="+Inf"} 13 tendermint_abci_connection_method_timing_sum{chain_id="ci",method="commit",type="sync"} 0.007802058000000001 tendermint_abci_connection_method_timing_count{chain_id="ci",method="commit",type="sync"} 13 ``` These metrics can easily be graphed using prometheus's `histogram_quantile(...)` method to pick out a particular quantile to graph or examine. I chose buckets that were somewhat of an estimate of expected range of times for ABCI operations. They start at .0001 seconds and range to 25 seconds. The hope is that this range captures enough possible times to be useful for us and operators.
3 years ago
internal/proxy: add initial set of abci metrics (#7115) This PR adds an initial set of metrics for use ABCI. The initial metrics enable the calculation of timing histograms and call counts for each of the ABCI methods. The metrics are also labeled as either 'sync' or 'async' to determine if the method call was performed using ABCI's `*Async` methods. An example of these metrics is included here for reference: ``` tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0001"} 0 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0004"} 5 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.002"} 12 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.009"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.02"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.1"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.65"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="2"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="6"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="25"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="+Inf"} 13 tendermint_abci_connection_method_timing_sum{chain_id="ci",method="commit",type="sync"} 0.007802058000000001 tendermint_abci_connection_method_timing_count{chain_id="ci",method="commit",type="sync"} 13 ``` These metrics can easily be graphed using prometheus's `histogram_quantile(...)` method to pick out a particular quantile to graph or examine. I chose buckets that were somewhat of an estimate of expected range of times for ABCI operations. They start at .0001 seconds and range to 25 seconds. The hope is that this range captures enough possible times to be useful for us and operators.
3 years ago
8 years ago
8 years ago
  1. package proxy
  2. import (
  3. "context"
  4. "os"
  5. "syscall"
  6. abciclient "github.com/tendermint/tendermint/abci/client"
  7. "github.com/tendermint/tendermint/libs/log"
  8. "github.com/tendermint/tendermint/libs/service"
  9. )
  10. // AppConns is the Tendermint's interface to the application that consists of
  11. // multiple connections.
  12. type AppConns interface {
  13. service.Service
  14. // Mempool connection
  15. Mempool() AppConnMempool
  16. // Consensus connection
  17. Consensus() AppConnConsensus
  18. // Query connection
  19. Query() AppConnQuery
  20. // Snapshot connection
  21. Snapshot() AppConnSnapshot
  22. }
  23. // NewAppConns calls NewMultiAppConn.
  24. func NewAppConns(clientCreator abciclient.Creator, logger log.Logger, metrics *Metrics) AppConns {
  25. return NewMultiAppConn(clientCreator, logger, metrics)
  26. }
  27. // multiAppConn implements AppConns.
  28. //
  29. // A multiAppConn is made of a few appConns and manages their underlying abci
  30. // clients.
  31. // TODO: on app restart, clients must reboot together
  32. type multiAppConn struct {
  33. service.BaseService
  34. logger log.Logger
  35. metrics *Metrics
  36. consensusConn AppConnConsensus
  37. mempoolConn AppConnMempool
  38. queryConn AppConnQuery
  39. snapshotConn AppConnSnapshot
  40. client stoppableClient
  41. clientCreator abciclient.Creator
  42. }
  43. // TODO: this is a totally internal and quasi permanent shim for
  44. // clients. eventually we can have a single client and have some kind
  45. // of reasonable lifecycle witout needing an explicit stop method.
  46. type stoppableClient interface {
  47. abciclient.Client
  48. Stop()
  49. }
  50. // NewMultiAppConn makes all necessary abci connections to the application.
  51. func NewMultiAppConn(clientCreator abciclient.Creator, logger log.Logger, metrics *Metrics) AppConns {
  52. multiAppConn := &multiAppConn{
  53. logger: logger,
  54. metrics: metrics,
  55. clientCreator: clientCreator,
  56. }
  57. multiAppConn.BaseService = *service.NewBaseService(logger, "multiAppConn", multiAppConn)
  58. return multiAppConn
  59. }
  60. func (app *multiAppConn) Mempool() AppConnMempool { return app.mempoolConn }
  61. func (app *multiAppConn) Consensus() AppConnConsensus { return app.consensusConn }
  62. func (app *multiAppConn) Query() AppConnQuery { return app.queryConn }
  63. func (app *multiAppConn) Snapshot() AppConnSnapshot { return app.snapshotConn }
  64. func (app *multiAppConn) OnStart(ctx context.Context) error {
  65. var err error
  66. defer func() {
  67. if err != nil {
  68. app.client.Stop()
  69. }
  70. }()
  71. var client abciclient.Client
  72. client, err = app.clientCreator(app.logger)
  73. if err != nil {
  74. return err
  75. }
  76. app.queryConn = NewAppConnQuery(client, app.metrics)
  77. app.snapshotConn = NewAppConnSnapshot(client, app.metrics)
  78. app.mempoolConn = NewAppConnMempool(client, app.metrics)
  79. app.consensusConn = NewAppConnConsensus(client, app.metrics)
  80. app.client = client.(stoppableClient)
  81. // Kill Tendermint if the ABCI application crashes.
  82. go func() {
  83. if !client.IsRunning() {
  84. return
  85. }
  86. app.client.Wait()
  87. if ctx.Err() != nil {
  88. return
  89. }
  90. if err := app.client.Error(); err != nil {
  91. app.logger.Error("client connection terminated. Did the application crash? Please restart tendermint",
  92. "err", err)
  93. if killErr := kill(); killErr != nil {
  94. app.logger.Error("Failed to kill this process - please do so manually",
  95. "err", killErr)
  96. }
  97. }
  98. }()
  99. return client.Start(ctx)
  100. }
  101. func (app *multiAppConn) OnStop() { app.client.Stop() }
  102. func kill() error {
  103. p, err := os.FindProcess(os.Getpid())
  104. if err != nil {
  105. return err
  106. }
  107. return p.Signal(syscall.SIGTERM)
  108. }