diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c03c90f4..e1375bd5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## v0.32.11 + +### BUG FIXES: + +- [privval] [\#4275](https://github.com/tendermint/tendermint/issues/4275) Fix consensus failure when remote signer drops (@melekes) + ## v0.32.10 *April 6, 2020* @@ -84,7 +90,7 @@ program](https://hackerone.com/tendermint). ### BUG FIXES: -- [rpc/lib] [\#4051](https://github.com/tendermint/tendermint/pull/4131) Fix RPC client, which was previously resolving https protocol to http (@yenkhoon) +- [rpc/lib] [\#4131](https://github.com/tendermint/tendermint/pull/4131) Fix RPC client, which was previously resolving https protocol to http (@yenkhoon) - [cs] [\#4069](https://github.com/tendermint/tendermint/issues/4069) Don't panic when block meta is not found in store (@gregzaitsev) ## v0.32.8 diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 4c57daaa2..abc908051 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -1,4 +1,4 @@ -## v0.32.11 +## v0.32.12 \*\* diff --git a/node/node.go b/node/node.go index 0ea88cfc6..234ebdbfc 100644 --- a/node/node.go +++ b/node/node.go @@ -1192,7 +1192,19 @@ func createAndStartPrivValidatorSocketClient( return nil, errors.Wrap(err, "failed to start private validator") } - return pvsc, nil + // try to get a pubkey from private validate first time + pubKey := pvsc.GetPubKey() + if pubKey == nil { + return nil, errors.New("could not retrieve public key from private validator") + } + + const ( + retries = 50 // 50 * 100ms = 5s total + timeout = 100 * time.Millisecond + ) + pvscWithRetries := privval.NewRetrySignerClient(pvsc, retries, timeout) + + return pvscWithRetries, nil } // splitAndTrimEmpty slices s into all subslices separated by sep and returns a diff --git a/node/node_test.go b/node/node_test.go index f93fcd2b0..7b869b104 100644 --- a/node/node_test.go +++ b/node/node_test.go @@ -158,7 +158,7 @@ func TestNodeSetPrivValTCP(t *testing.T) { n, err := DefaultNewNode(config, log.TestingLogger()) require.NoError(t, err) - assert.IsType(t, &privval.SignerClient{}, n.PrivValidator()) + assert.IsType(t, &privval.RetrySignerClient{}, n.PrivValidator()) } // address without a protocol must result in error @@ -202,7 +202,7 @@ func TestNodeSetPrivValIPC(t *testing.T) { n, err := DefaultNewNode(config, log.TestingLogger()) require.NoError(t, err) - assert.IsType(t, &privval.SignerClient{}, n.PrivValidator()) + assert.IsType(t, &privval.RetrySignerClient{}, n.PrivValidator()) } // testFreeAddr claims a free port so we don't block on listener being ready. diff --git a/privval/retry_signer_client.go b/privval/retry_signer_client.go new file mode 100644 index 000000000..8c08b46ba --- /dev/null +++ b/privval/retry_signer_client.go @@ -0,0 +1,77 @@ +package privval + +import ( + "errors" + "time" + + "github.com/tendermint/tendermint/crypto" + "github.com/tendermint/tendermint/types" +) + +// RetrySignerClient wraps SignerClient adding retry for each operation (except +// Ping) w/ a timeout. +type RetrySignerClient struct { + next *SignerClient + retries int + timeout time.Duration +} + +// NewRetrySignerClient returns RetrySignerClient. If +retries+ is 0, the +// client will be retrying each operation indefinitely. +func NewRetrySignerClient(sc *SignerClient, retries int, timeout time.Duration) *RetrySignerClient { + return &RetrySignerClient{sc, retries, timeout} +} + +var _ types.PrivValidator = (*RetrySignerClient)(nil) + +func (sc *RetrySignerClient) Close() error { + return sc.next.Close() +} + +func (sc *RetrySignerClient) IsConnected() bool { + return sc.next.IsConnected() +} + +func (sc *RetrySignerClient) WaitForConnection(maxWait time.Duration) error { + return sc.next.WaitForConnection(maxWait) +} + +//-------------------------------------------------------- +// Implement PrivValidator + +func (sc *RetrySignerClient) Ping() error { + return sc.next.Ping() +} + +func (sc *RetrySignerClient) GetPubKey() crypto.PubKey { + for i := 0; i < sc.retries || sc.retries == 0; i++ { + pk := sc.next.GetPubKey() + if pk != nil { + return pk + } + time.Sleep(sc.timeout) + } + return nil +} + +func (sc *RetrySignerClient) SignVote(chainID string, vote *types.Vote) error { + for i := 0; i < sc.retries || sc.retries == 0; i++ { + err := sc.next.SignVote(chainID, vote) + if err == nil { + return nil + } + time.Sleep(sc.timeout) + } + return errors.New("exhausted all attempts to sign vote") +} + +func (sc *RetrySignerClient) SignProposal(chainID string, proposal *types.Proposal) error { + for i := 0; i < sc.retries || sc.retries == 0; i++ { + err := sc.next.SignProposal(chainID, proposal) + if err == nil { + return nil + } + time.Sleep(sc.timeout) + } + return errors.New("exhausted all attempts to sign proposal") +} diff --git a/version/version.go b/version/version.go index c748054a6..143ee352c 100644 --- a/version/version.go +++ b/version/version.go @@ -21,7 +21,7 @@ const ( // XXX: Don't change the name of this variable or you will break // automation :) - TMCoreSemVer = "0.32.10" + TMCoreSemVer = "0.32.11" // ABCISemVer is the semantic version of the ABCI library ABCISemVer = "0.16.1"