Browse Source

privval: retry GetPubKey/SignVote/SignProposal N times before

returning an error

Closes #4707
pull/4825/head
Anton Kaliaev 5 years ago
committed by GitHub
parent
commit
2afae13a48
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 114 additions and 7 deletions
  1. +7
    -2
      CHANGELOG.md
  2. +1
    -0
      CHANGELOG_PENDING.md
  3. +15
    -3
      node/node.go
  4. +2
    -2
      node/node_test.go
  5. +6
    -0
      privval/doc.go
  6. +83
    -0
      privval/retry_signer_client.go

+ 7
- 2
CHANGELOG.md View File

@ -42,7 +42,6 @@ Friendly reminder, we have a [bug bounty program](https://hackerone.com/tendermi
### BUG FIXES: ### BUG FIXES:
- [rpc] [\#4568](https://github.com/tendermint/tendermint/issues/4568) Fix panic when `Subscribe` is called, but HTTP client is not running. `Subscribe`, `Unsubscribe(All)` methods return an error now (@melekes). - [rpc] [\#4568](https://github.com/tendermint/tendermint/issues/4568) Fix panic when `Subscribe` is called, but HTTP client is not running. `Subscribe`, `Unsubscribe(All)` methods return an error now (@melekes).
## v0.33.3 ## v0.33.3
@ -339,6 +338,12 @@ subjectivity interface. Refer to the [spec](https://github.com/tendermint/spec/b
- [consensus/types] [\#4243](https://github.com/tendermint/tendermint/issues/4243) fix BenchmarkRoundStateDeepCopy panics (@cuonglm) - [consensus/types] [\#4243](https://github.com/tendermint/tendermint/issues/4243) fix BenchmarkRoundStateDeepCopy panics (@cuonglm)
- [rpc] [\#4256](https://github.com/tendermint/tendermint/issues/4256) Pass `outCapacity` to `eventBus#Subscribe` when subscribing using a local client - [rpc] [\#4256](https://github.com/tendermint/tendermint/issues/4256) Pass `outCapacity` to `eventBus#Subscribe` when subscribing using a local client
## v0.32.11
### BUG FIXES:
- [privval] [\#4275](https://github.com/tendermint/tendermint/issues/4275) Fix consensus failure when remote signer drops (@melekes)
## v0.32.10 ## v0.32.10
*April 6, 2020* *April 6, 2020*
@ -421,7 +426,7 @@ program](https://hackerone.com/tendermint).
### BUG FIXES: ### BUG FIXES:
- [rpc/lib] [\#4051](https://github.com/tendermint/tendermint/pull/4131) Fix RPC client, which was previously resolving https protocol to http (@yenkhoon)
- [rpc/lib] [\#4131](https://github.com/tendermint/tendermint/pull/4131) Fix RPC client, which was previously resolving https protocol to http (@yenkhoon)
- [cs] [\#4069](https://github.com/tendermint/tendermint/issues/4069) Don't panic when block meta is not found in store (@gregzaitsev) - [cs] [\#4069](https://github.com/tendermint/tendermint/issues/4069) Don't panic when block meta is not found in store (@gregzaitsev)
## v0.32.8 ## v0.32.8


+ 1
- 0
CHANGELOG_PENDING.md View File

@ -60,3 +60,4 @@ Friendly reminder, we have a [bug bounty program](https://hackerone.com/tendermi
- [light] [\#4741](https://github.com/tendermint/tendermint/pull/4741) Correctly return `ErrSignedHeaderNotFound` and `ErrValidatorSetNotFound` on corresponding RPC errors (@erikgrinaker) - [light] [\#4741](https://github.com/tendermint/tendermint/pull/4741) Correctly return `ErrSignedHeaderNotFound` and `ErrValidatorSetNotFound` on corresponding RPC errors (@erikgrinaker)
- [rpc] \#4805 Attempt to handle panics during panic recovery (@erikgrinaker) - [rpc] \#4805 Attempt to handle panics during panic recovery (@erikgrinaker)
- [types] [\#4764](https://github.com/tendermint/tendermint/pull/4764) Return an error if voting power overflows in `VerifyCommitTrusting` (@melekes) - [types] [\#4764](https://github.com/tendermint/tendermint/pull/4764) Return an error if voting power overflows in `VerifyCommitTrusting` (@melekes)
- [privval] [\#4812](https://github.com/tendermint/tendermint/pull/4812) Retry `GetPubKey/SignVote/SignProposal` a few times before returning an error (@melekes)

+ 15
- 3
node/node.go View File

@ -1310,15 +1310,27 @@ func createAndStartPrivValidatorSocketClient(
) (types.PrivValidator, error) { ) (types.PrivValidator, error) {
pve, err := privval.NewSignerListener(listenAddr, logger) pve, err := privval.NewSignerListener(listenAddr, logger)
if err != nil { if err != nil {
return nil, errors.Wrap(err, "failed to start private validator")
return nil, fmt.Errorf("failed to start private validator: %w", err)
} }
pvsc, err := privval.NewSignerClient(pve) pvsc, err := privval.NewSignerClient(pve)
if err != nil { if err != nil {
return nil, errors.Wrap(err, "failed to start private validator")
return nil, fmt.Errorf("failed to start private validator: %w", err)
} }
return pvsc, nil
// try to get a pubkey from private validate first time
_, err = pvsc.GetPubKey()
if err != nil {
return nil, fmt.Errorf("can't get pubkey: %w", err)
}
const (
retries = 50 // 50 * 100ms = 5s total
timeout = 100 * time.Millisecond
)
pvscWithRetries := privval.NewRetrySignerClient(pvsc, retries, timeout)
return pvscWithRetries, nil
} }
// splitAndTrimEmpty slices s into all subslices separated by sep and returns a // splitAndTrimEmpty slices s into all subslices separated by sep and returns a


+ 2
- 2
node/node_test.go View File

@ -160,7 +160,7 @@ func TestNodeSetPrivValTCP(t *testing.T) {
n, err := DefaultNewNode(config, log.TestingLogger()) n, err := DefaultNewNode(config, log.TestingLogger())
require.NoError(t, err) require.NoError(t, err)
assert.IsType(t, &privval.SignerClient{}, n.PrivValidator())
assert.IsType(t, &privval.RetrySignerClient{}, n.PrivValidator())
} }
// address without a protocol must result in error // address without a protocol must result in error
@ -204,7 +204,7 @@ func TestNodeSetPrivValIPC(t *testing.T) {
n, err := DefaultNewNode(config, log.TestingLogger()) n, err := DefaultNewNode(config, log.TestingLogger())
require.NoError(t, err) require.NoError(t, err)
assert.IsType(t, &privval.SignerClient{}, n.PrivValidator())
assert.IsType(t, &privval.RetrySignerClient{}, n.PrivValidator())
} }
// testFreeAddr claims a free port so we don't block on listener being ready. // testFreeAddr claims a free port so we don't block on listener being ready.


+ 6
- 0
privval/doc.go View File

@ -19,5 +19,11 @@ SignerDialerEndpoint
SignerDialerEndpoint is a simple wrapper around a net.Conn. It's used by both IPCVal and TCPVal. SignerDialerEndpoint is a simple wrapper around a net.Conn. It's used by both IPCVal and TCPVal.
SignerClient
SignerClient handles remote validator connections that provide signing services.
In production, it's recommended to wrap it with RetrySignerClient to avoid
termination in case of temporary errors.
*/ */
package privval package privval

+ 83
- 0
privval/retry_signer_client.go View File

@ -0,0 +1,83 @@
package privval
import (
"fmt"
"time"
"github.com/tendermint/tendermint/crypto"
"github.com/tendermint/tendermint/types"
)
// RetrySignerClient wraps SignerClient adding retry for each operation (except
// Ping) w/ a timeout.
type RetrySignerClient struct {
next *SignerClient
retries int
timeout time.Duration
}
// NewRetrySignerClient returns RetrySignerClient. If +retries+ is 0, the
// client will be retrying each operation indefinitely.
func NewRetrySignerClient(sc *SignerClient, retries int, timeout time.Duration) *RetrySignerClient {
return &RetrySignerClient{sc, retries, timeout}
}
var _ types.PrivValidator = (*RetrySignerClient)(nil)
func (sc *RetrySignerClient) Close() error {
return sc.next.Close()
}
func (sc *RetrySignerClient) IsConnected() bool {
return sc.next.IsConnected()
}
func (sc *RetrySignerClient) WaitForConnection(maxWait time.Duration) error {
return sc.next.WaitForConnection(maxWait)
}
//--------------------------------------------------------
// Implement PrivValidator
func (sc *RetrySignerClient) Ping() error {
return sc.next.Ping()
}
func (sc *RetrySignerClient) GetPubKey() (crypto.PubKey, error) {
var (
pk crypto.PubKey
err error
)
for i := 0; i < sc.retries || sc.retries == 0; i++ {
pk, err = sc.next.GetPubKey()
if err == nil {
return pk, nil
}
time.Sleep(sc.timeout)
}
return nil, fmt.Errorf("exhausted all attempts to get pubkey: %w", err)
}
func (sc *RetrySignerClient) SignVote(chainID string, vote *types.Vote) error {
var err error
for i := 0; i < sc.retries || sc.retries == 0; i++ {
err = sc.next.SignVote(chainID, vote)
if err == nil {
return nil
}
time.Sleep(sc.timeout)
}
return fmt.Errorf("exhausted all attempts to sign vote: %w", err)
}
func (sc *RetrySignerClient) SignProposal(chainID string, proposal *types.Proposal) error {
var err error
for i := 0; i < sc.retries || sc.retries == 0; i++ {
err = sc.next.SignProposal(chainID, proposal)
if err == nil {
return nil
}
time.Sleep(sc.timeout)
}
return fmt.Errorf("exhausted all attempts to sign proposal: %w", err)
}

Loading…
Cancel
Save