Browse Source

consensus: double-sign risk reduction (ADR-51) (#5147)

Implementation spec of Double Signing Risk Reduction [ADR-51](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-051-double-signing-risk-reduction.md) by B-Harvest
- Add `DoubleSignCheckHeight` config variable to ConsensusConfig for "How many blocks looks back to check existence of the node's consensus votes when before joining consensus"
- Add `consensus.double_sign_check_height` to `config.toml` and `tendermint node` as flag for set `DoubleSignCheckHeight`
- Set default `consensus.double_sign_check_height` to `0`  ( it could be adjustable in this PR, disable when 0  )

Refs

- [ADR-51](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-051-double-signing-risk-reduction.md)
- [https://github.com/tendermint/tendermint/issues/4059](https://github.com/tendermint/tendermint/issues/4059)
- [https://github.com/tendermint/tendermint/pull/4262](https://github.com/tendermint/tendermint/pull/4262)
pull/5297/head
dongsam 4 years ago
committed by GitHub
parent
commit
e30b125725
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 214 additions and 13 deletions
  1. +1
    -1
      .github/workflows/coverage.yml
  2. +1
    -0
      CHANGELOG_PENDING.md
  3. +3
    -0
      cmd/tendermint/commands/run_node.go
  4. +7
    -0
      config/config.go
  5. +1
    -0
      config/config_test.go
  6. +6
    -0
      config/toml.go
  7. +32
    -3
      consensus/state.go
  8. +2
    -2
      docs/architecture/adr-051-double-signing-risk-reduction.md
  9. +6
    -0
      docs/tendermint-core/configuration.md
  10. +9
    -7
      docs/tendermint-core/validators.md
  11. +1
    -0
      test/README.md
  12. +64
    -0
      test/p2p/dsrr/check_peer.sh
  13. +13
    -0
      test/p2p/dsrr/test.sh
  14. +65
    -0
      test/p2p/dsrr/test_peer.sh
  15. +3
    -0
      test/p2p/test.sh

+ 1
- 1
.github/workflows/coverage.yml View File

@ -104,7 +104,7 @@ jobs:
if: "env.GIT_DIFF != ''"
- name: test & coverage report creation
run: |
cat xac.txt | xargs go test -mod=readonly -timeout 5m -race -coverprofile=coverage.txt -covermode=atomic
cat xac.txt | xargs go test -mod=readonly -timeout 10m -race -coverprofile=coverage.txt -covermode=atomic
if: "env.GIT_DIFF != ''"
- uses: codecov/codecov-action@v1.0.13
with:


+ 1
- 0
CHANGELOG_PENDING.md View File

@ -11,6 +11,7 @@ Friendly reminder, we have a [bug bounty program](https://hackerone.com/tendermi
## FEATURES
- [privval] \#5239 Add `chainID` to requests from client. (@marbar3778)
- [config] Add `--consensus.double_sign_check_height` flag and `DoubleSignCheckHeight` config variable. See [ADR-51](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-051-double-signing-risk-reduction.md)
## IMPROVEMENTS


+ 3
- 0
cmd/tendermint/commands/run_node.go View File

@ -37,6 +37,9 @@ func AddNodeFlags(cmd *cobra.Command) {
"genesis_hash",
[]byte{},
"Optional SHA-256 hash of the genesis file")
cmd.Flags().Int64("consensus.double_sign_check_height", config.Consensus.DoubleSignCheckHeight,
"How many blocks to look back to check existence of the node's "+
"consensus votes before joining consensus")
// abci flags
cmd.Flags().String(


+ 7
- 0
config/config.go View File

@ -830,6 +830,8 @@ type ConsensusConfig struct {
// Reactor sleep duration parameters
PeerGossipSleepDuration time.Duration `mapstructure:"peer_gossip_sleep_duration"`
PeerQueryMaj23SleepDuration time.Duration `mapstructure:"peer_query_maj23_sleep_duration"`
DoubleSignCheckHeight int64 `mapstructure:"double_sign_check_height"`
}
// DefaultConsensusConfig returns a default configuration for the consensus service
@ -848,6 +850,7 @@ func DefaultConsensusConfig() *ConsensusConfig {
CreateEmptyBlocksInterval: 0 * time.Second,
PeerGossipSleepDuration: 100 * time.Millisecond,
PeerQueryMaj23SleepDuration: 2000 * time.Millisecond,
DoubleSignCheckHeight: int64(0),
}
}
@ -864,6 +867,7 @@ func TestConsensusConfig() *ConsensusConfig {
cfg.SkipTimeoutCommit = true
cfg.PeerGossipSleepDuration = 5 * time.Millisecond
cfg.PeerQueryMaj23SleepDuration = 250 * time.Millisecond
cfg.DoubleSignCheckHeight = int64(0)
return cfg
}
@ -945,6 +949,9 @@ func (cfg *ConsensusConfig) ValidateBasic() error {
if cfg.PeerQueryMaj23SleepDuration < 0 {
return errors.New("peer_query_maj23_sleep_duration can't be negative")
}
if cfg.DoubleSignCheckHeight < 0 {
return errors.New("double_sign_check_height can't be negative")
}
return nil
}


+ 1
- 0
config/config_test.go View File

@ -164,6 +164,7 @@ func TestConsensusConfig_ValidateBasic(t *testing.T) {
"PeerGossipSleepDuration negative": {func(c *ConsensusConfig) { c.PeerGossipSleepDuration = -1 }, true},
"PeerQueryMaj23SleepDuration": {func(c *ConsensusConfig) { c.PeerQueryMaj23SleepDuration = time.Second }, false},
"PeerQueryMaj23SleepDuration negative": {func(c *ConsensusConfig) { c.PeerQueryMaj23SleepDuration = -1 }, true},
"DoubleSignCheckHeight negative": {func(c *ConsensusConfig) { c.DoubleSignCheckHeight = -1 }, true},
}
for desc, tc := range testcases {
tc := tc // appease linter


+ 6
- 0
config/toml.go View File

@ -381,6 +381,12 @@ timeout_precommit = "{{ .Consensus.TimeoutPrecommit }}"
timeout_precommit_delta = "{{ .Consensus.TimeoutPrecommitDelta }}"
timeout_commit = "{{ .Consensus.TimeoutCommit }}"
# How many blocks to look back to check existence of the node's consensus votes before joining consensus
# When non-zero, the node will panic upon restart
# if the same consensus key was used to sign {double_sign_check_height} last blocks.
# So, validators should stop the state machine, wait for some blocks, and then restart the state machine to avoid panic.
double_sign_check_height = {{ .Consensus.DoubleSignCheckHeight }}
# Make progress as soon as we have all the precommits (as if TimeoutCommit = 0)
skip_timeout_commit = {{ .Consensus.SkipTimeoutCommit }}


+ 32
- 3
consensus/state.go View File

@ -34,9 +34,10 @@ import (
// Errors
var (
ErrInvalidProposalSignature = errors.New("error invalid proposal signature")
ErrInvalidProposalPOLRound = errors.New("error invalid proposal POL round")
ErrAddingVote = errors.New("error adding vote")
ErrInvalidProposalSignature = errors.New("error invalid proposal signature")
ErrInvalidProposalPOLRound = errors.New("error invalid proposal POL round")
ErrAddingVote = errors.New("error adding vote")
ErrSignatureFoundInPastBlocks = errors.New("found signature from the same key")
errPubKeyIsNotSet = errors.New("pubkey is not set. Look for \"Can't get private validator pubkey\" errors")
)
@ -366,6 +367,11 @@ func (cs *State) OnStart() error {
return err
}
// Double Signing Risk Reduction
if err := cs.checkDoubleSigningRisk(cs.Height); err != nil {
return err
}
// now start the receiveRoutine
go cs.receiveRoutine(0)
@ -2114,6 +2120,29 @@ func (cs *State) updatePrivValidatorPubKey() error {
return nil
}
// look back to check existence of the node's consensus votes before joining consensus
func (cs *State) checkDoubleSigningRisk(height int64) error {
if cs.privValidator != nil && cs.privValidatorPubKey != nil && cs.config.DoubleSignCheckHeight > 0 && height > 0 {
valAddr := cs.privValidatorPubKey.Address()
doubleSignCheckHeight := cs.config.DoubleSignCheckHeight
if doubleSignCheckHeight > height {
doubleSignCheckHeight = height
}
for i := int64(1); i < doubleSignCheckHeight; i++ {
lastCommit := cs.blockStore.LoadSeenCommit(height - i)
if lastCommit != nil {
for sigIdx, s := range lastCommit.Signatures {
if s.BlockIDFlag == types.BlockIDFlagCommit && bytes.Equal(s.ValidatorAddress, valAddr) {
cs.Logger.Info("Found signature from the same key", "sig", s, "idx", sigIdx, "height", height-i)
return ErrSignatureFoundInPastBlocks
}
}
}
}
}
return nil
}
//---------------------------------------------------------
func CompareHRS(h1 int64, r1 int32, s1 cstypes.RoundStepType, h2 int64, r2 int32, s2 cstypes.RoundStepType) int {


+ 2
- 2
docs/architecture/adr-051-double-signing-risk-reduction.md View File

@ -28,8 +28,8 @@ We would like to suggest a double signing risk reduction method.
- Configuration
- We would like to suggest by introducing `double_sign_check_height` parameter in `config.toml` and cli, how many blocks state machine looks back to check votes
- <span v-pre>`double_sign_check_height = {{ .Consensus.DoubleSignCheckHeight }}`</span> in `config.toml`
- `tendermint node --double_sign_check_height` in cli
- State machine ignore checking procedure when `vote-check-height == 0`
- `tendermint node --consensus.double_sign_check_height` in cli
- State machine ignore checking procedure when `double_sign_check_height == 0`
## Status


+ 6
- 0
docs/tendermint-core/configuration.md View File

@ -334,6 +334,12 @@ timeout_precommit = "1s"
timeout_precommit_delta = "500ms"
timeout_commit = "1s"
# How many blocks to look back to check existence of the node's consensus votes before joining consensus
# When non-zero, the node will panic upon restart
# if the same consensus key was used to sign {double_sign_check_height} last blocks.
# So, validators should stop the state machine, wait for some blocks, and then restart the state machine to avoid panic.
double_sign_check_height = 0
# Make progress as soon as we have all the precommits (as if TimeoutCommit = 0)
skip_timeout_commit = false


+ 9
- 7
docs/tendermint-core/validators.md View File

@ -61,16 +61,18 @@ When initializing nodes there are five parameters in the `config.toml` that may
- `unconditional_peer_ids:` comma separated list of nodeID's. These nodes will be connected to no matter the limits of inbound and outbound peers. This is useful for when sentry nodes have full address books.
- `private_peer_ids:` comma separated list of nodeID's. These nodes will not be gossiped to the network. This is an important field as you do not want your validator IP gossiped to the network.
- `addr_book_strict:` boolean. By default nodes with a routable address will be considered for connection. If this setting is turned off (false), non-routable IP addresses, like addresses in a private network can be added to the address book.
- `double_sign_check_height` int64 height. How many blocks to look back to check existence of the node's consensus votes before joining consensus When non-zero, the node will panic upon restart if the same consensus key was used to sign {double_sign_check_height} last blocks. So, validators should stop the state machine, wait for some blocks, and then restart the state machine to avoid panic.
#### Validator Node Configuration
| Config Option | Setting |
| ---------------------- | -------------------------- |
| pex | false |
| persistent_peers | list of sentry nodes |
| private_peer_ids | none |
| unconditional_peer_ids | optionally sentry node IDs |
| addr_book_strict | false |
| Config Option | Setting |
| ------------------------ | -------------------------- |
| pex | false |
| persistent_peers | list of sentry nodes |
| private_peer_ids | none |
| unconditional_peer_ids | optionally sentry node IDs |
| addr_book_strict | false |
| double_sign_check_height | 10 |
The validator node should have `pex=false` so it does not gossip to the entire network. The persistent peers will be your sentry nodes. Private peers can be left empty as the validator is not trying to hide who it is communicating with. Setting unconditional peers is optional for a validator because they will not have a full address books.


+ 1
- 0
test/README.md View File

@ -19,3 +19,4 @@ and run the following tests in docker containers:
- send a tx on each node and ensure the state root is updated on all of them
- crash and restart nodes one at a time and ensure they can sync back up (via fastsync)
- crash and restart all nodes at once and ensure they can sync back up
- restart each nodes with double_sign_check_height and ensure panic if the same consensus key was used to sign in double_sign_check_height blocks

+ 64
- 0
test/p2p/dsrr/check_peer.sh View File

@ -0,0 +1,64 @@
#! /bin/bash
set -eu
set -o pipefail
IPV=$1
ID=$2
ASSERT_CASE=$3
ASSERT_NODE_UP=1
ASSERT_NODE_DOWN=0
MAX_TRY=10
###########################################
#
# Wait for peer to catchup to other peers
#
###########################################
addr=$(test/p2p/address.sh $IPV $ID 26657)
peerID=$(( $(($ID % 4)) + 1 )) # 1->2 ... 3->4 ... 4->1
peer_addr=$(test/p2p/address.sh $IPV $peerID 26657)
# get another peer's height
h1=`curl -s $peer_addr/status | jq .result.sync_info.latest_block_height | jq fromjson`
# get another peer's state
root1=`curl -s $peer_addr/status | jq .result.sync_info.latest_app_hash`
echo "Other peer is on height $h1 with state $root1"
echo "Waiting for peer $ID to catch up"
# wait for it to sync to past its previous height
set +e
set +o pipefail
h2="0"
COUNT=0
while [[ "$h2" -lt "$(($h1+1))" ]]; do
sleep 1
h2=`curl -s $addr/status --connect-timeout 1 | jq .result.sync_info.latest_block_height | jq fromjson`
COUNT=$((COUNT+1))
echo "... $h2, try $COUNT"
if [ "$COUNT" -ge "$MAX_TRY" ]; then
if [ $ASSERT_CASE -eq $ASSERT_NODE_DOWN ]; then
echo "double sign risk reduction operates normally as expected"
fi
if [ $ASSERT_CASE -eq $ASSERT_NODE_UP ]; then
echo "double sign risk reduction fail"
exit 1
fi
break
fi
done
if [ $ASSERT_CASE -eq $ASSERT_NODE_UP ]; then
# check the app hash
root2=`curl -s $addr/status | jq .result.sync_info.latest_app_hash`
if [[ "$root1" != "$root2" ]]; then
echo "App hash after restart does not match. Got $root2; expected $root1"
exit 1
fi
echo "... double sign risk reduction test passed"
fi

+ 13
- 0
test/p2p/dsrr/test.sh View File

@ -0,0 +1,13 @@
#! /bin/bash
set -eu
DOCKER_IMAGE=$1
NETWORK_NAME=$2
IPV=$3
N=$4
PROXY_APP=$5
# run it on each of them
for i in `seq 1 $N`; do
bash test/p2p/dsrr/test_peer.sh $DOCKER_IMAGE $NETWORK_NAME $IPV $i $N $PROXY_APP
done

+ 65
- 0
test/p2p/dsrr/test_peer.sh View File

@ -0,0 +1,65 @@
#! /bin/bash
set -eu
set -o pipefail
DOCKER_IMAGE=$1
NETWORK_NAME=$2
IPV=$3
ID=$4
N=$5
PROXY_APP=$6
ASSERT_NODE_UP=1
ASSERT_NODE_DOWN=0
###########################s####################################
# this runs on each peer:
# kill peer
# bring it back online with double_sign_check_height 10
# wait node is not run by double sign risk reduction
#
# kill peer
# bring it back online with double_sign_check_height 1
# pass double sign risk reduction, wait for it to sync and check the app hash
#
# kill peer
# bring it back online with double_sign_check_height 0
# wait for it to sync and check the app hash
###############################################################
echo "Testing double sign risk reduction on node $ID"
# kill peer
set +e
docker rm -vf local_testnet_$ID
set -e
PERSISTENT_PEERS="$(test/p2p/address.sh $IPV 1 26656 $DOCKER_IMAGE)"
for j in `seq 2 $N`; do
PERSISTENT_PEERS="$PERSISTENT_PEERS,$(test/p2p/address.sh $IPV $j 26656 $DOCKER_IMAGE)"
done
# bring it back online with double_sign_check_height 10
# wait node is not run by double sign risk reduction
DSCH=10
bash test/p2p/peer.sh $DOCKER_IMAGE $NETWORK_NAME $IPV $ID $PROXY_APP "--p2p.persistent_peers $PERSISTENT_PEERS --p2p.pex --rpc.unsafe --consensus.double_sign_check_height $DSCH"
bash test/p2p/client.sh $DOCKER_IMAGE $NETWORK_NAME $IPV fs_$ID "test/p2p/dsrr/check_peer.sh $IPV $ID $ASSERT_NODE_DOWN"
docker stop local_testnet_$ID
docker rm local_testnet_$ID
# bring it back online with double_sign_check_height 1
# pass double sign risk reduction, wait for it to sync and check the app hash
DSCH=1
bash test/p2p/peer.sh $DOCKER_IMAGE $NETWORK_NAME $IPV $ID $PROXY_APP "--p2p.persistent_peers $PERSISTENT_PEERS --p2p.pex --rpc.unsafe --consensus.double_sign_check_height $DSCH"
bash test/p2p/client.sh $DOCKER_IMAGE $NETWORK_NAME $IPV fs_$ID "test/p2p/dsrr/check_peer.sh $IPV $ID $ASSERT_NODE_UP"
docker stop local_testnet_$ID
docker rm local_testnet_$ID
DSCH=0
# bring it back online with double_sign_check_height 0
# double sign risk reduction is not activated, wait for it to sync and check the app hash
bash test/p2p/peer.sh $DOCKER_IMAGE $NETWORK_NAME $IPV $ID $PROXY_APP "--p2p.persistent_peers $PERSISTENT_PEERS --p2p.pex --rpc.unsafe --consensus.double_sign_check_height $DSCH"
bash test/p2p/client.sh $DOCKER_IMAGE $NETWORK_NAME $IPV fs_$ID "test/p2p/dsrr/check_peer.sh $IPV $ID $ASSERT_NODE_UP"
echo ""
echo "PASS"
echo ""

+ 3
- 0
test/p2p/test.sh View File

@ -35,6 +35,9 @@ bash test/p2p/client.sh "$DOCKER_IMAGE" "$NETWORK_NAME" "$IPV" ab "test/p2p/atom
# for each node, kill it and readd via fast sync
bash test/p2p/fast_sync/test.sh "$DOCKER_IMAGE" "$NETWORK_NAME" "$IPV" "$N" "$PROXY_APP"
# test double sign risk reduction for each node
bash test/p2p/dsrr/test.sh "$DOCKER_IMAGE" "$NETWORK_NAME" "$IPV" "$N" "$PROXY_APP"
# test killing all peers 3 times
bash test/p2p/kill_all/test.sh "$DOCKER_IMAGE" "$NETWORK_NAME" "$IPV" "$N" 3


Loading…
Cancel
Save