You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

243 lines
6.2 KiB

8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
internal/proxy: add initial set of abci metrics (#7115) This PR adds an initial set of metrics for use ABCI. The initial metrics enable the calculation of timing histograms and call counts for each of the ABCI methods. The metrics are also labeled as either 'sync' or 'async' to determine if the method call was performed using ABCI's `*Async` methods. An example of these metrics is included here for reference: ``` tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0001"} 0 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0004"} 5 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.002"} 12 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.009"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.02"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.1"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.65"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="2"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="6"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="25"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="+Inf"} 13 tendermint_abci_connection_method_timing_sum{chain_id="ci",method="commit",type="sync"} 0.007802058000000001 tendermint_abci_connection_method_timing_count{chain_id="ci",method="commit",type="sync"} 13 ``` These metrics can easily be graphed using prometheus's `histogram_quantile(...)` method to pick out a particular quantile to graph or examine. I chose buckets that were somewhat of an estimate of expected range of times for ABCI operations. They start at .0001 seconds and range to 25 seconds. The hope is that this range captures enough possible times to be useful for us and operators.
3 years ago
internal/proxy: add initial set of abci metrics (#7115) This PR adds an initial set of metrics for use ABCI. The initial metrics enable the calculation of timing histograms and call counts for each of the ABCI methods. The metrics are also labeled as either 'sync' or 'async' to determine if the method call was performed using ABCI's `*Async` methods. An example of these metrics is included here for reference: ``` tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0001"} 0 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0004"} 5 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.002"} 12 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.009"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.02"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.1"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.65"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="2"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="6"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="25"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="+Inf"} 13 tendermint_abci_connection_method_timing_sum{chain_id="ci",method="commit",type="sync"} 0.007802058000000001 tendermint_abci_connection_method_timing_count{chain_id="ci",method="commit",type="sync"} 13 ``` These metrics can easily be graphed using prometheus's `histogram_quantile(...)` method to pick out a particular quantile to graph or examine. I chose buckets that were somewhat of an estimate of expected range of times for ABCI operations. They start at .0001 seconds and range to 25 seconds. The hope is that this range captures enough possible times to be useful for us and operators.
3 years ago
8 years ago
8 years ago
8 years ago
internal/proxy: add initial set of abci metrics (#7115) This PR adds an initial set of metrics for use ABCI. The initial metrics enable the calculation of timing histograms and call counts for each of the ABCI methods. The metrics are also labeled as either 'sync' or 'async' to determine if the method call was performed using ABCI's `*Async` methods. An example of these metrics is included here for reference: ``` tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0001"} 0 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0004"} 5 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.002"} 12 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.009"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.02"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.1"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.65"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="2"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="6"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="25"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="+Inf"} 13 tendermint_abci_connection_method_timing_sum{chain_id="ci",method="commit",type="sync"} 0.007802058000000001 tendermint_abci_connection_method_timing_count{chain_id="ci",method="commit",type="sync"} 13 ``` These metrics can easily be graphed using prometheus's `histogram_quantile(...)` method to pick out a particular quantile to graph or examine. I chose buckets that were somewhat of an estimate of expected range of times for ABCI operations. They start at .0001 seconds and range to 25 seconds. The hope is that this range captures enough possible times to be useful for us and operators.
3 years ago
8 years ago
internal/proxy: add initial set of abci metrics (#7115) This PR adds an initial set of metrics for use ABCI. The initial metrics enable the calculation of timing histograms and call counts for each of the ABCI methods. The metrics are also labeled as either 'sync' or 'async' to determine if the method call was performed using ABCI's `*Async` methods. An example of these metrics is included here for reference: ``` tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0001"} 0 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0004"} 5 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.002"} 12 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.009"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.02"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.1"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.65"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="2"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="6"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="25"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="+Inf"} 13 tendermint_abci_connection_method_timing_sum{chain_id="ci",method="commit",type="sync"} 0.007802058000000001 tendermint_abci_connection_method_timing_count{chain_id="ci",method="commit",type="sync"} 13 ``` These metrics can easily be graphed using prometheus's `histogram_quantile(...)` method to pick out a particular quantile to graph or examine. I chose buckets that were somewhat of an estimate of expected range of times for ABCI operations. They start at .0001 seconds and range to 25 seconds. The hope is that this range captures enough possible times to be useful for us and operators.
3 years ago
internal/proxy: add initial set of abci metrics (#7115) This PR adds an initial set of metrics for use ABCI. The initial metrics enable the calculation of timing histograms and call counts for each of the ABCI methods. The metrics are also labeled as either 'sync' or 'async' to determine if the method call was performed using ABCI's `*Async` methods. An example of these metrics is included here for reference: ``` tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0001"} 0 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0004"} 5 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.002"} 12 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.009"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.02"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.1"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.65"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="2"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="6"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="25"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="+Inf"} 13 tendermint_abci_connection_method_timing_sum{chain_id="ci",method="commit",type="sync"} 0.007802058000000001 tendermint_abci_connection_method_timing_count{chain_id="ci",method="commit",type="sync"} 13 ``` These metrics can easily be graphed using prometheus's `histogram_quantile(...)` method to pick out a particular quantile to graph or examine. I chose buckets that were somewhat of an estimate of expected range of times for ABCI operations. They start at .0001 seconds and range to 25 seconds. The hope is that this range captures enough possible times to be useful for us and operators.
3 years ago
internal/proxy: add initial set of abci metrics (#7115) This PR adds an initial set of metrics for use ABCI. The initial metrics enable the calculation of timing histograms and call counts for each of the ABCI methods. The metrics are also labeled as either 'sync' or 'async' to determine if the method call was performed using ABCI's `*Async` methods. An example of these metrics is included here for reference: ``` tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0001"} 0 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.0004"} 5 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.002"} 12 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.009"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.02"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.1"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="0.65"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="2"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="6"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="25"} 13 tendermint_abci_connection_method_timing_bucket{chain_id="ci",method="commit",type="sync",le="+Inf"} 13 tendermint_abci_connection_method_timing_sum{chain_id="ci",method="commit",type="sync"} 0.007802058000000001 tendermint_abci_connection_method_timing_count{chain_id="ci",method="commit",type="sync"} 13 ``` These metrics can easily be graphed using prometheus's `histogram_quantile(...)` method to pick out a particular quantile to graph or examine. I chose buckets that were somewhat of an estimate of expected range of times for ABCI operations. They start at .0001 seconds and range to 25 seconds. The hope is that this range captures enough possible times to be useful for us and operators.
3 years ago
  1. package proxy
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "os"
  7. "syscall"
  8. abciclient "github.com/tendermint/tendermint/abci/client"
  9. "github.com/tendermint/tendermint/libs/log"
  10. "github.com/tendermint/tendermint/libs/service"
  11. )
  12. const (
  13. connConsensus = "consensus"
  14. connMempool = "mempool"
  15. connQuery = "query"
  16. connSnapshot = "snapshot"
  17. )
  18. // AppConns is the Tendermint's interface to the application that consists of
  19. // multiple connections.
  20. type AppConns interface {
  21. service.Service
  22. // Mempool connection
  23. Mempool() AppConnMempool
  24. // Consensus connection
  25. Consensus() AppConnConsensus
  26. // Query connection
  27. Query() AppConnQuery
  28. // Snapshot connection
  29. Snapshot() AppConnSnapshot
  30. }
  31. // NewAppConns calls NewMultiAppConn.
  32. func NewAppConns(clientCreator abciclient.Creator, logger log.Logger, metrics *Metrics) AppConns {
  33. return NewMultiAppConn(clientCreator, logger, metrics)
  34. }
  35. // multiAppConn implements AppConns.
  36. //
  37. // A multiAppConn is made of a few appConns and manages their underlying abci
  38. // clients.
  39. // TODO: on app restart, clients must reboot together
  40. type multiAppConn struct {
  41. service.BaseService
  42. logger log.Logger
  43. metrics *Metrics
  44. consensusConn AppConnConsensus
  45. mempoolConn AppConnMempool
  46. queryConn AppConnQuery
  47. snapshotConn AppConnSnapshot
  48. consensusConnClient stoppableClient
  49. mempoolConnClient stoppableClient
  50. queryConnClient stoppableClient
  51. snapshotConnClient stoppableClient
  52. clientCreator abciclient.Creator
  53. }
  54. // TODO: this is a totally internal and quasi permanent shim for
  55. // clients. eventually we can have a single client and have some kind
  56. // of reasonable lifecycle witout needing an explicit stop method.
  57. type stoppableClient interface {
  58. abciclient.Client
  59. Stop() error
  60. }
  61. // NewMultiAppConn makes all necessary abci connections to the application.
  62. func NewMultiAppConn(clientCreator abciclient.Creator, logger log.Logger, metrics *Metrics) AppConns {
  63. multiAppConn := &multiAppConn{
  64. logger: logger,
  65. metrics: metrics,
  66. clientCreator: clientCreator,
  67. }
  68. multiAppConn.BaseService = *service.NewBaseService(logger, "multiAppConn", multiAppConn)
  69. return multiAppConn
  70. }
  71. func (app *multiAppConn) Mempool() AppConnMempool {
  72. return app.mempoolConn
  73. }
  74. func (app *multiAppConn) Consensus() AppConnConsensus {
  75. return app.consensusConn
  76. }
  77. func (app *multiAppConn) Query() AppConnQuery {
  78. return app.queryConn
  79. }
  80. func (app *multiAppConn) Snapshot() AppConnSnapshot {
  81. return app.snapshotConn
  82. }
  83. func (app *multiAppConn) OnStart(ctx context.Context) error {
  84. c, err := app.abciClientFor(ctx, connQuery)
  85. if err != nil {
  86. return err
  87. }
  88. app.queryConnClient = c.(stoppableClient)
  89. app.queryConn = NewAppConnQuery(c, app.metrics)
  90. c, err = app.abciClientFor(ctx, connSnapshot)
  91. if err != nil {
  92. app.stopAllClients()
  93. return err
  94. }
  95. app.snapshotConnClient = c.(stoppableClient)
  96. app.snapshotConn = NewAppConnSnapshot(c, app.metrics)
  97. c, err = app.abciClientFor(ctx, connMempool)
  98. if err != nil {
  99. app.stopAllClients()
  100. return err
  101. }
  102. app.mempoolConnClient = c.(stoppableClient)
  103. app.mempoolConn = NewAppConnMempool(c, app.metrics)
  104. c, err = app.abciClientFor(ctx, connConsensus)
  105. if err != nil {
  106. app.stopAllClients()
  107. return err
  108. }
  109. app.consensusConnClient = c.(stoppableClient)
  110. app.consensusConn = NewAppConnConsensus(c, app.metrics)
  111. // Kill Tendermint if the ABCI application crashes.
  112. app.startWatchersForClientErrorToKillTendermint(ctx)
  113. return nil
  114. }
  115. func (app *multiAppConn) OnStop() {
  116. app.stopAllClients()
  117. }
  118. func (app *multiAppConn) startWatchersForClientErrorToKillTendermint(ctx context.Context) {
  119. // this function starts a number of threads (per abci client)
  120. // that will SIGTERM's our own PID if any of the ABCI clients
  121. // exit/return early. If the context is canceled then these
  122. // functions will not kill tendermint.
  123. killFn := func(conn string, err error, logger log.Logger) {
  124. logger.Error(
  125. fmt.Sprintf("%s connection terminated. Did the application crash? Please restart tendermint", conn),
  126. "err", err)
  127. if killErr := kill(); killErr != nil {
  128. logger.Error("Failed to kill this process - please do so manually", "err", killErr)
  129. }
  130. }
  131. type op struct {
  132. connClient stoppableClient
  133. name string
  134. }
  135. for _, client := range []op{
  136. {
  137. connClient: app.consensusConnClient,
  138. name: connConsensus,
  139. },
  140. {
  141. connClient: app.mempoolConnClient,
  142. name: connMempool,
  143. },
  144. {
  145. connClient: app.queryConnClient,
  146. name: connQuery,
  147. },
  148. {
  149. connClient: app.snapshotConnClient,
  150. name: connSnapshot,
  151. },
  152. } {
  153. go func(name string, client stoppableClient) {
  154. client.Wait()
  155. if ctx.Err() != nil {
  156. return
  157. }
  158. if err := client.Error(); err != nil {
  159. killFn(name, err, app.logger)
  160. }
  161. }(client.name, client.connClient)
  162. }
  163. }
  164. func (app *multiAppConn) stopAllClients() {
  165. if app.consensusConnClient != nil {
  166. if err := app.consensusConnClient.Stop(); err != nil {
  167. if !errors.Is(err, service.ErrAlreadyStopped) {
  168. app.logger.Error("error while stopping consensus client", "error", err)
  169. }
  170. }
  171. }
  172. if app.mempoolConnClient != nil {
  173. if err := app.mempoolConnClient.Stop(); err != nil {
  174. if !errors.Is(err, service.ErrAlreadyStopped) {
  175. app.logger.Error("error while stopping mempool client", "error", err)
  176. }
  177. }
  178. }
  179. if app.queryConnClient != nil {
  180. if err := app.queryConnClient.Stop(); err != nil {
  181. if !errors.Is(err, service.ErrAlreadyStopped) {
  182. app.logger.Error("error while stopping query client", "error", err)
  183. }
  184. }
  185. }
  186. if app.snapshotConnClient != nil {
  187. if err := app.snapshotConnClient.Stop(); err != nil {
  188. if !errors.Is(err, service.ErrAlreadyStopped) {
  189. app.logger.Error("error while stopping snapshot client", "error", err)
  190. }
  191. }
  192. }
  193. }
  194. func (app *multiAppConn) abciClientFor(ctx context.Context, conn string) (abciclient.Client, error) {
  195. c, err := app.clientCreator(app.logger.With(
  196. "module", "abci-client",
  197. "connection", conn))
  198. if err != nil {
  199. return nil, fmt.Errorf("error creating ABCI client (%s connection): %w", conn, err)
  200. }
  201. if err := c.Start(ctx); err != nil {
  202. return nil, fmt.Errorf("error starting ABCI client (%s connection): %w", conn, err)
  203. }
  204. return c, nil
  205. }
  206. func kill() error {
  207. p, err := os.FindProcess(os.Getpid())
  208. if err != nil {
  209. return err
  210. }
  211. return p.Signal(syscall.SIGTERM)
  212. }