From 8df32cd5408e59248579d8b5b7db785b5682d06e Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Tue, 6 Dec 2016 19:54:10 -0500 Subject: [PATCH 01/10] test: increase proposal timeout --- consensus/byzantine_test.go | 1 - consensus/common_test.go | 18 ++++++++++++++++++ consensus/reactor_test.go | 15 --------------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/consensus/byzantine_test.go b/consensus/byzantine_test.go index f049fa3ef..e5233806b 100644 --- a/consensus/byzantine_test.go +++ b/consensus/byzantine_test.go @@ -29,7 +29,6 @@ func init() { // Byzantine validator refuses to prevote. // Heal partition and ensure A sees the commit func TestByzantine(t *testing.T) { - resetConfigTimeouts() N := 4 css := randConsensusNet(N) diff --git a/consensus/common_test.go b/consensus/common_test.go index 7f15ab5fb..9d33a5c80 100644 --- a/consensus/common_test.go +++ b/consensus/common_test.go @@ -12,6 +12,7 @@ import ( . "github.com/tendermint/go-common" cfg "github.com/tendermint/go-config" dbm "github.com/tendermint/go-db" + "github.com/tendermint/go-logger" "github.com/tendermint/go-p2p" bc "github.com/tendermint/tendermint/blockchain" "github.com/tendermint/tendermint/config/tendermint_test" @@ -264,6 +265,7 @@ func randConsensusNet(nValidators int) []*ConsensusState { state := sm.MakeGenesisState(db, genDoc) state.Save() thisConfig := tendermint_test.ResetConfig(Fmt("consensus_reactor_test_%d", i)) + resetConfigTimeouts(thisConfig) EnsureDir(thisConfig.GetString("cs_wal_dir"), 0700) // dir for wal css[i] = newConsensusStateWithConfig(thisConfig, state, privVals[i], counter.NewCounterApplication(true)) } @@ -279,6 +281,7 @@ func randConsensusNetWithPeers(nValidators int, nPeers int) []*ConsensusState { state := sm.MakeGenesisState(db, genDoc) state.Save() thisConfig := tendermint_test.ResetConfig(Fmt("consensus_reactor_test_%d", i)) + resetConfigTimeouts(thisConfig) EnsureDir(thisConfig.GetString("cs_wal_dir"), 0700) // dir for wal var privVal *types.PrivValidator if i < nValidators { @@ -367,3 +370,18 @@ func getSwitchIndex(switches []*p2p.Switch, peer *p2p.Peer) int { panic("didnt find peer in switches") return -1 } + +// so we dont violate synchrony assumptions +// TODO: make tests more robust to this instead (handle round changes) +// XXX: especially a problem when running the race detector +func resetConfigTimeouts(config cfg.Config) { + logger.SetLogLevel("info") + //config.Set("log_level", "notice") + config.Set("timeout_propose", 10000) // TODO + // config.Set("timeout_propose_delta", 500) + // config.Set("timeout_prevote", 1000) + // config.Set("timeout_prevote_delta", 500) + // config.Set("timeout_precommit", 1000) + // config.Set("timeout_precommit_delta", 500) + config.Set("timeout_commit", 1000) +} diff --git a/consensus/reactor_test.go b/consensus/reactor_test.go index 504bd97f3..de0eaa18b 100644 --- a/consensus/reactor_test.go +++ b/consensus/reactor_test.go @@ -9,7 +9,6 @@ import ( "github.com/tendermint/tendermint/config/tendermint_test" "github.com/tendermint/go-events" - "github.com/tendermint/go-logger" "github.com/tendermint/go-p2p" "github.com/tendermint/tendermint/types" "github.com/tendermint/tmsp/example/dummy" @@ -19,24 +18,11 @@ func init() { config = tendermint_test.ResetConfig("consensus_reactor_test") } -func resetConfigTimeouts() { - logger.SetLogLevel("info") - //config.Set("log_level", "notice") - config.Set("timeout_propose", 2000) - // config.Set("timeout_propose_delta", 500) - // config.Set("timeout_prevote", 1000) - // config.Set("timeout_prevote_delta", 500) - // config.Set("timeout_precommit", 1000) - // config.Set("timeout_precommit_delta", 500) - config.Set("timeout_commit", 1000) -} - //---------------------------------------------- // in-process testnets // Ensure a testnet makes blocks func TestReactor(t *testing.T) { - resetConfigTimeouts() N := 4 css := randConsensusNet(N) reactors := make([]*ConsensusReactor, N) @@ -70,7 +56,6 @@ func TestReactor(t *testing.T) { // ensure we can make blocks despite cycling a validator set func TestValidatorSetChanges(t *testing.T) { - resetConfigTimeouts() nPeers := 8 nVals := 4 css := randConsensusNetWithPeers(nVals, nPeers) From 69ef1da58cdfd67cd9e3b4cee3e630ee7772295b Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Tue, 6 Dec 2016 20:53:02 -0500 Subject: [PATCH 02/10] types: copy vote set bit array --- types/vote_set.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/types/vote_set.go b/types/vote_set.go index e7598532e..10b1aa094 100644 --- a/types/vote_set.go +++ b/types/vote_set.go @@ -325,7 +325,7 @@ func (voteSet *VoteSet) BitArrayByBlockID(blockID BlockID) *BitArray { defer voteSet.mtx.Unlock() votesByBlock, ok := voteSet.votesByBlock[blockID.Key()] if ok { - return votesByBlock.bitArray + return votesByBlock.bitArray.Copy() } return nil } From 2abcde03ada676baa85107f51fbe26e96fef32f9 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Tue, 6 Dec 2016 22:08:05 -0500 Subject: [PATCH 03/10] tests: cleanup and fix scripts --- test/app/counter_test.sh | 70 +++++++++++++++++--------------------- test/app/test.sh | 10 +++--- test/p2p/fast_sync/test.sh | 54 +++++++++++------------------ test/p2p/test.sh | 20 +---------- test/persist/test.sh | 2 ++ 5 files changed, 59 insertions(+), 97 deletions(-) diff --git a/test/app/counter_test.sh b/test/app/counter_test.sh index 80b9b27b4..24f4fb618 100644 --- a/test/app/counter_test.sh +++ b/test/app/counter_test.sh @@ -1,4 +1,9 @@ #! /bin/bash + +if [[ "$GRPC_BROADCAST_TX" == "" ]]; then + GRPC_BROADCAST_TX="" +fi + set -u ##################### @@ -25,34 +30,40 @@ function sendTx() { TX=$1 if [[ "$GRPC_BROADCAST_TX" == "" ]]; then RESPONSE=`curl -s localhost:46657/broadcast_tx_commit?tx=\"$TX\"` - CODE=`echo $RESPONSE | jq .result[1].code` ERROR=`echo $RESPONSE | jq .error` ERROR=$(echo "$ERROR" | tr -d '"') # remove surrounding quotes + + RESPONSE=`echo $RESPONSE | jq .result[1]` else if [ ! -f grpc_client ]; then go build -o grpc_client grpc_client.go fi RESPONSE=`./grpc_client $TX` - echo $RESPONSE | jq . &> /dev/null - IS_JSON=$? - if [[ "$IS_JSON" != "0" ]]; then - ERROR="$RESPONSE" - else - ERROR="" # reset - fi - APPEND_TX_RESPONSE=`echo $RESPONSE | jq .append_tx` - APPEND_TX_CODE=`getCode "$APPEND_TX_RESPONSE"` - CHECK_TX_RESPONSE=`echo $RESPONSE | jq .check_tx` - CHECK_TX_CODE=`getCode "$CHECK_TX_RESPONSE"` - - echo "-------" - echo "TX $TX" - echo "RESPONSE $RESPONSE" - echo "CHECK_TX_RESPONSE $CHECK_TX_RESPONSE" - echo "APPEND_TX_RESPONSE $APPEND_TX_RESPONSE" - echo "CHECK_TX_CODE $CHECK_TX_CODE" - echo "APPEND_TX_CODE $APPEND_TX_CODE" - echo "----" + ERROR="" + fi + + echo "RESPONSE" + echo $RESPONSE + + echo $RESPONSE | jq . &> /dev/null + IS_JSON=$? + if [[ "$IS_JSON" != "0" ]]; then + ERROR="$RESPONSE" + fi + APPEND_TX_RESPONSE=`echo $RESPONSE | jq .append_tx` + APPEND_TX_CODE=`getCode "$APPEND_TX_RESPONSE"` + CHECK_TX_RESPONSE=`echo $RESPONSE | jq .check_tx` + CHECK_TX_CODE=`getCode "$CHECK_TX_RESPONSE"` + + echo "-------" + echo "TX $TX" + echo "RESPONSE $RESPONSE" + echo "ERROR $ERROR" + echo "----" + + if [[ "$ERROR" != "" ]]; then + echo "Unexpected error sending tx ($TX): $ERROR" + exit 1 fi } @@ -66,10 +77,6 @@ if [[ $APPEND_TX_CODE != 0 ]]; then exit 1 fi -if [[ "$GRPC_BROADCAST_TX" == "" && "$ERROR" != "" ]]; then - echo "Unexpected error. Tx $TX should have been included in a block. $ERROR" - exit 1 -fi echo "... sending tx. expect error" @@ -80,11 +87,6 @@ if [[ "$CHECK_TX_CODE" == 0 ]]; then echo "Got zero exit code for $TX. Expected tx to be rejected by mempool. $RESPONSE" exit 1 fi -if [[ "$GRPC_BROADCAST_TX" == "" && "$ERROR" == "" ]]; then - echo "Expected to get an error - tx $TX should have been rejected from mempool" - echo "$RESPONSE" - exit 1 -fi echo "... sending tx. expect no error" @@ -96,10 +98,6 @@ if [[ $APPEND_TX_CODE != 0 ]]; then echo "Got non-zero exit code for $TX. $RESPONSE" exit 1 fi -if [[ "$GRPC_BROADCAST_TX" == "" && "$ERROR" != "" ]]; then - echo "Unexpected error. Tx $TX should have been accepted in block. $ERROR" - exit 1 -fi echo "... sending tx. expect no error, but invalid" @@ -114,9 +112,5 @@ if [[ $APPEND_TX_CODE == 0 ]]; then echo "Got zero exit code for $TX. Should have been bad nonce. $RESPONSE" exit 1 fi -if [[ "$GRPC_BROADCAST_TX" == "" && "$ERROR" != "" ]]; then - echo "Unexpected error. Tx $TX should have been included in a block. $ERROR" - exit 1 -fi echo "Passed Test: $TESTNAME" diff --git a/test/app/test.sh b/test/app/test.sh index 4830c2b15..bcc55c937 100644 --- a/test/app/test.sh +++ b/test/app/test.sh @@ -13,7 +13,7 @@ export TMROOT=$HOME/.tendermint_app function dummy_over_socket(){ rm -rf $TMROOT tendermint init - echo "Starting dummy and tendermint" + echo "Starting dummy_over_socket" dummy > /dev/null & pid_dummy=$! tendermint node > tendermint.log & @@ -30,7 +30,7 @@ function dummy_over_socket(){ function dummy_over_socket_reorder(){ rm -rf $TMROOT tendermint init - echo "Starting tendermint and dummy" + echo "Starting dummy_over_socket_reorder (ie. start tendermint first)" tendermint node > tendermint.log & pid_tendermint=$! sleep 2 @@ -48,7 +48,7 @@ function dummy_over_socket_reorder(){ function counter_over_socket() { rm -rf $TMROOT tendermint init - echo "Starting counter and tendermint" + echo "Starting counter_over_socket" counter --serial > /dev/null & pid_counter=$! tendermint node > tendermint.log & @@ -64,7 +64,7 @@ function counter_over_socket() { function counter_over_grpc() { rm -rf $TMROOT tendermint init - echo "Starting counter and tendermint" + echo "Starting counter_over_grpc" counter --serial --tmsp grpc > /dev/null & pid_counter=$! tendermint node --tmsp grpc > tendermint.log & @@ -80,7 +80,7 @@ function counter_over_grpc() { function counter_over_grpc_grpc() { rm -rf $TMROOT tendermint init - echo "Starting counter and tendermint" + echo "Starting counter_over_grpc_grpc (ie. with grpc broadcast_tx)" counter --serial --tmsp grpc > /dev/null & pid_counter=$! sleep 1 diff --git a/test/p2p/fast_sync/test.sh b/test/p2p/fast_sync/test.sh index 7a5453f00..43c5d041a 100644 --- a/test/p2p/fast_sync/test.sh +++ b/test/p2p/fast_sync/test.sh @@ -1,44 +1,28 @@ #! /bin/bash set -eu -set -o pipefail -############################################################### -# for each peer: -# kill peer -# bring it back online via fast sync -# check app hash -############################################################### +DOCKER_IMAGE=$1 +NETWORK_NAME=$2 +COUNT=$3 +N=$4 -ID=$1 +echo "Testing fasysync on node $COUNT" -addr=$(test/p2p/ip.sh $ID):46657 -peerID=$(( $(($ID % 4)) + 1 )) # 1->2 ... 3->4 ... 4->1 -peer_addr=$(test/p2p/ip.sh $peerID):46657 +# kill peer +set +e # circle sigh :( +docker rm -vf local_testnet_$COUNT +set -e -# get another peer's height -h1=`curl -s $peer_addr/status | jq .result[1].latest_block_height` - -# get another peer's state -root1=`curl -s $peer_addr/status | jq .result[1].latest_app_hash` - -echo "Other peer is on height $h1 with state $root1" -echo "Waiting for peer $ID to catch up" - -# wait for it to sync to past its previous height -set +e -set +o pipefail -h2="0" -while [[ "$h2" -lt "$(($h1+3))" ]]; do - sleep 1 - h2=`curl -s $addr/status | jq .result[1].latest_block_height` - echo "... $h2" +# restart peer - should have an empty blockchain +SEEDS="$(test/p2p/ip.sh 1):46656" +for j in `seq 2 $N`; do + SEEDS="$SEEDS,$(test/p2p/ip.sh $j):46656" done +bash test/p2p/peer.sh $DOCKER_IMAGE $NETWORK_NAME $COUNT $SEEDS + +bash test/p2p/client.sh $DOCKER_IMAGE $NETWORK_NAME fs_$COUNT "test/p2p/fast_sync/restart_peer.sh $COUNT" -# check the app hash -root2=`curl -s $addr/status | jq .result[1].latest_app_hash` +echo "" +echo "PASS" +echo "" -if [[ "$root1" != "$root2" ]]; then - echo "App hash after fast sync does not match. Got $root2; expected $root1" - exit 1 -fi -echo "... fast sync successful" diff --git a/test/p2p/test.sh b/test/p2p/test.sh index 4d021a4ee..9ca50737c 100644 --- a/test/p2p/test.sh +++ b/test/p2p/test.sh @@ -16,23 +16,5 @@ bash test/p2p/client.sh $DOCKER_IMAGE $NETWORK_NAME ab test/p2p/atomic_broadcast # run it on each of them N=4 for i in `seq 1 $N`; do - echo "Testing fasysync on node $i" - - # kill peer - set +e # circle sigh :( - docker rm -vf local_testnet_$i - set -e - - # restart peer - should have an empty blockchain - SEEDS="$(test/p2p/ip.sh 1):46656" - for j in `seq 2 $N`; do - SEEDS="$SEEDS,$(test/p2p/ip.sh $j):46656" - done - bash test/p2p/peer.sh $DOCKER_IMAGE $NETWORK_NAME $i $SEEDS - - bash test/p2p/client.sh $DOCKER_IMAGE $NETWORK_NAME fs_$i "test/p2p/fast_sync/test.sh $i" + bash test/p2p/fast_sync/test.sh $DOCKER_IMAGE $NETWORK_NAME $i $N done -echo "" -echo "PASS" -echo "" - diff --git a/test/persist/test.sh b/test/persist/test.sh index 5c1e12411..1a94a0938 100644 --- a/test/persist/test.sh +++ b/test/persist/test.sh @@ -37,6 +37,7 @@ function send_txs(){ start_procs 1 send_txs kill_procs + start_procs 2 # wait for node to handshake and make a new block @@ -64,5 +65,6 @@ while [ "$h2" == "$h1" ]; do done kill_procs +sleep 2 echo "Passed Test: Persistence" From 6be5bda8c9c5df2429297be14e1925db05947224 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Tue, 6 Dec 2016 23:01:25 -0500 Subject: [PATCH 04/10] types: copy commit bit array --- types/block.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/types/block.go b/types/block.go index d971b90b6..bcabe5b6b 100644 --- a/types/block.go +++ b/types/block.go @@ -270,7 +270,7 @@ func (commit *Commit) BitArray() *BitArray { commit.bitArray.SetIndex(i, precommit != nil) } } - return commit.bitArray + return commit.bitArray.Copy() } func (commit *Commit) GetByIndex(index int) *Vote { From 242571173483c0a08134eed9ea139946ee8fb5b0 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Tue, 6 Dec 2016 23:01:55 -0500 Subject: [PATCH 05/10] blockchain: use ApplyBlock --- blockchain/reactor.go | 19 +++++++------------ state/execution.go | 10 +++++----- state/execution_test.go | 2 +- 3 files changed, 13 insertions(+), 18 deletions(-) diff --git a/blockchain/reactor.go b/blockchain/reactor.go index f7c7586e9..bfa671d02 100644 --- a/blockchain/reactor.go +++ b/blockchain/reactor.go @@ -235,23 +235,18 @@ FOR_LOOP: break SYNC_LOOP } else { bcR.pool.PopRequest() - // TODO: use ApplyBlock instead of Exec/Commit/SetAppHash/Save + + bcR.store.SaveBlock(first, firstParts, second.LastCommit) + // TODO: should we be firing events? need to fire NewBlock events manually ... - err := bcR.state.ExecBlock(bcR.evsw, bcR.proxyAppConn, first, firstPartsHeader) - if err != nil { - // TODO This is bad, are we zombie? - PanicQ(Fmt("Failed to process committed block (%d:%X): %v", first.Height, first.Hash(), err)) - } // NOTE: we could improve performance if we // didn't make the app commit to disk every block // ... but we would need a way to get the hash without it persisting - res := bcR.proxyAppConn.CommitSync() - if res.IsErr() { - // TODO Handle gracefully. - PanicQ(Fmt("Failed to commit block at application: %v", res)) + err := bcR.state.ApplyBlock(bcR.evsw, bcR.proxyAppConn, first, firstPartsHeader, sm.MockMempool{}) + if err != nil { + // TODO This is bad, are we zombie? + PanicQ(Fmt("Failed to process committed block (%d:%X): %v", first.Height, first.Hash(), err)) } - bcR.store.SaveBlock(first, firstParts, second.LastCommit) - bcR.state.AppHash = res.Data bcR.state.Save() } } diff --git a/state/execution.go b/state/execution.go index 016fe50c3..cec4849ab 100644 --- a/state/execution.go +++ b/state/execution.go @@ -280,12 +280,12 @@ type Mempool interface { Update(height int, txs []types.Tx) } -type mockMempool struct { +type MockMempool struct { } -func (m mockMempool) Lock() {} -func (m mockMempool) Unlock() {} -func (m mockMempool) Update(height int, txs []types.Tx) {} +func (m MockMempool) Lock() {} +func (m MockMempool) Unlock() {} +func (m MockMempool) Update(height int, txs []types.Tx) {} //---------------------------------------------------------------- // Handshake with app to sync to latest state of core by replaying blocks @@ -386,7 +386,7 @@ func (h *Handshaker) ReplayBlocks(appHash []byte, appBlockHeight int, appConnCon var eventCache types.Fireable // nil // replay the block against the actual tendermint state - return h.state.ApplyBlock(eventCache, appConnConsensus, block, blockMeta.PartsHeader, mockMempool{}) + return h.state.ApplyBlock(eventCache, appConnConsensus, block, blockMeta.PartsHeader, MockMempool{}) } else { // either we're caught up or there's blocks to replay diff --git a/state/execution_test.go b/state/execution_test.go index e0527a42e..cbaab0997 100644 --- a/state/execution_test.go +++ b/state/execution_test.go @@ -20,7 +20,7 @@ var ( privKey = crypto.GenPrivKeyEd25519FromSecret([]byte("handshake_test")) chainID = "handshake_chain" nBlocks = 5 - mempool = mockMempool{} + mempool = MockMempool{} testPartSize = 65536 ) From 73502fab0dbe523ea0611132db016a4ce4997c0f Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Tue, 6 Dec 2016 23:16:41 -0500 Subject: [PATCH 06/10] shame: forgot a file --- test/p2p/fast_sync/restart_peer.sh | 44 ++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 test/p2p/fast_sync/restart_peer.sh diff --git a/test/p2p/fast_sync/restart_peer.sh b/test/p2p/fast_sync/restart_peer.sh new file mode 100644 index 000000000..7a5453f00 --- /dev/null +++ b/test/p2p/fast_sync/restart_peer.sh @@ -0,0 +1,44 @@ +#! /bin/bash +set -eu +set -o pipefail + +############################################################### +# for each peer: +# kill peer +# bring it back online via fast sync +# check app hash +############################################################### + +ID=$1 + +addr=$(test/p2p/ip.sh $ID):46657 +peerID=$(( $(($ID % 4)) + 1 )) # 1->2 ... 3->4 ... 4->1 +peer_addr=$(test/p2p/ip.sh $peerID):46657 + +# get another peer's height +h1=`curl -s $peer_addr/status | jq .result[1].latest_block_height` + +# get another peer's state +root1=`curl -s $peer_addr/status | jq .result[1].latest_app_hash` + +echo "Other peer is on height $h1 with state $root1" +echo "Waiting for peer $ID to catch up" + +# wait for it to sync to past its previous height +set +e +set +o pipefail +h2="0" +while [[ "$h2" -lt "$(($h1+3))" ]]; do + sleep 1 + h2=`curl -s $addr/status | jq .result[1].latest_block_height` + echo "... $h2" +done + +# check the app hash +root2=`curl -s $addr/status | jq .result[1].latest_app_hash` + +if [[ "$root1" != "$root2" ]]; then + echo "App hash after fast sync does not match. Got $root2; expected $root1" + exit 1 +fi +echo "... fast sync successful" From d800a51da4bc2bda8e2c133075d6f7e4f67f5846 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Wed, 7 Dec 2016 20:13:52 -0500 Subject: [PATCH 07/10] test: crank it to eleventy --- consensus/common_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/consensus/common_test.go b/consensus/common_test.go index 9d33a5c80..31061fe74 100644 --- a/consensus/common_test.go +++ b/consensus/common_test.go @@ -373,11 +373,11 @@ func getSwitchIndex(switches []*p2p.Switch, peer *p2p.Peer) int { // so we dont violate synchrony assumptions // TODO: make tests more robust to this instead (handle round changes) -// XXX: especially a problem when running the race detector +// XXX: especially a problem when running the race detector on circle func resetConfigTimeouts(config cfg.Config) { logger.SetLogLevel("info") //config.Set("log_level", "notice") - config.Set("timeout_propose", 10000) // TODO + config.Set("timeout_propose", 110000) // TODO: crank it to eleventy // config.Set("timeout_propose_delta", 500) // config.Set("timeout_prevote", 1000) // config.Set("timeout_prevote_delta", 500) From bcd8712ec394cc8cc37dff494f6a12ec559ecf0e Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Mon, 12 Dec 2016 16:00:21 -0500 Subject: [PATCH 08/10] test: more cleanup on p2p --- test/p2p/atomic_broadcast/test.sh | 41 ++------------ test/p2p/basic/test.sh | 53 +++++++++++++++++++ .../{restart_peer.sh => check_peer.sh} | 13 +++-- test/p2p/fast_sync/test.sh | 23 ++------ test/p2p/fast_sync/test_peer.sh | 37 +++++++++++++ test/p2p/local_testnet.sh | 2 +- test/p2p/test.sh | 21 ++++---- 7 files changed, 119 insertions(+), 71 deletions(-) create mode 100644 test/p2p/basic/test.sh rename test/p2p/fast_sync/{restart_peer.sh => check_peer.sh} (81%) create mode 100644 test/p2p/fast_sync/test_peer.sh diff --git a/test/p2p/atomic_broadcast/test.sh b/test/p2p/atomic_broadcast/test.sh index 3b78166d6..3eef2d5eb 100644 --- a/test/p2p/atomic_broadcast/test.sh +++ b/test/p2p/atomic_broadcast/test.sh @@ -1,52 +1,21 @@ #! /bin/bash +set -u + +N=$1 ################################################################### -# wait for all peers to come online +# assumes peers are already synced up +# test sending txs # for each peer: -# wait to have 3 peers -# wait to be at height > 1 # send a tx, wait for commit # assert app hash on every peer reflects the post tx state ################################################################### -N=4 - -# wait for everyone to come online -echo "Waiting for nodes to come online" -for i in `seq 1 $N`; do - addr=$(test/p2p/ip.sh $i):46657 - curl -s $addr/status > /dev/null - ERR=$? - while [ "$ERR" != 0 ]; do - sleep 1 - curl -s $addr/status > /dev/null - ERR=$? - done - echo "... node $i is up" -done - echo "" # run the test on each of them for i in `seq 1 $N`; do addr=$(test/p2p/ip.sh $i):46657 - # - assert everyone has 3 other peers - N_PEERS=`curl -s $addr/net_info | jq '.result[1].peers | length'` - while [ "$N_PEERS" != 3 ]; do - echo "Waiting for node $i to connect to all peers ..." - sleep 1 - N_PEERS=`curl -s $addr/net_info | jq '.result[1].peers | length'` - done - - # - assert block height is greater than 1 - BLOCK_HEIGHT=`curl -s $addr/status | jq .result[1].latest_block_height` - while [ "$BLOCK_HEIGHT" -le 1 ]; do - echo "Waiting for node $i to commit a block ..." - sleep 1 - BLOCK_HEIGHT=`curl -s $addr/status | jq .result[1].latest_block_height` - done - echo "Node $i is connected to all peers and at block $BLOCK_HEIGHT" - # current state HASH1=`curl -s $addr/status | jq .result[1].latest_app_hash` diff --git a/test/p2p/basic/test.sh b/test/p2p/basic/test.sh new file mode 100644 index 000000000..3399515a8 --- /dev/null +++ b/test/p2p/basic/test.sh @@ -0,0 +1,53 @@ +#! /bin/bash +set -u + +N=$1 + +################################################################### +# wait for all peers to come online +# for each peer: +# wait to have N-1 peers +# wait to be at height > 1 +################################################################### + +# wait for everyone to come online +echo "Waiting for nodes to come online" +for i in `seq 1 $N`; do + addr=$(test/p2p/ip.sh $i):46657 + curl -s $addr/status > /dev/null + ERR=$? + while [ "$ERR" != 0 ]; do + sleep 1 + curl -s $addr/status > /dev/null + ERR=$? + done + echo "... node $i is up" +done + +echo "" +# wait for each of them to sync up +for i in `seq 1 $N`; do + addr=$(test/p2p/ip.sh $i):46657 + N_1=$(($N - 1)) + + # - assert everyone has N-1 other peers + N_PEERS=`curl -s $addr/net_info | jq '.result[1].peers | length'` + while [ "$N_PEERS" != $N_1 ]; do + echo "Waiting for node $i to connect to all peers ..." + sleep 1 + N_PEERS=`curl -s $addr/net_info | jq '.result[1].peers | length'` + done + + # - assert block height is greater than 1 + BLOCK_HEIGHT=`curl -s $addr/status | jq .result[1].latest_block_height` + while [ "$BLOCK_HEIGHT" -le 1 ]; do + echo "Waiting for node $i to commit a block ..." + sleep 1 + BLOCK_HEIGHT=`curl -s $addr/status | jq .result[1].latest_block_height` + done + echo "Node $i is connected to all peers and at block $BLOCK_HEIGHT" +done + +echo "" +echo "PASS" +echo "" diff --git a/test/p2p/fast_sync/restart_peer.sh b/test/p2p/fast_sync/check_peer.sh similarity index 81% rename from test/p2p/fast_sync/restart_peer.sh rename to test/p2p/fast_sync/check_peer.sh index 7a5453f00..c459277d2 100644 --- a/test/p2p/fast_sync/restart_peer.sh +++ b/test/p2p/fast_sync/check_peer.sh @@ -2,15 +2,14 @@ set -eu set -o pipefail -############################################################### -# for each peer: -# kill peer -# bring it back online via fast sync -# check app hash -############################################################### - ID=$1 +########################################### +# +# Wait for peer to catchup to other peers +# +########################################### + addr=$(test/p2p/ip.sh $ID):46657 peerID=$(( $(($ID % 4)) + 1 )) # 1->2 ... 3->4 ... 4->1 peer_addr=$(test/p2p/ip.sh $peerID):46657 diff --git a/test/p2p/fast_sync/test.sh b/test/p2p/fast_sync/test.sh index 43c5d041a..b4ac90f99 100644 --- a/test/p2p/fast_sync/test.sh +++ b/test/p2p/fast_sync/test.sh @@ -3,26 +3,13 @@ set -eu DOCKER_IMAGE=$1 NETWORK_NAME=$2 -COUNT=$3 -N=$4 +N=$3 -echo "Testing fasysync on node $COUNT" +cd $GOPATH/src/github.com/tendermint/tendermint -# kill peer -set +e # circle sigh :( -docker rm -vf local_testnet_$COUNT -set -e - -# restart peer - should have an empty blockchain -SEEDS="$(test/p2p/ip.sh 1):46656" -for j in `seq 2 $N`; do - SEEDS="$SEEDS,$(test/p2p/ip.sh $j):46656" +# run it on each of them +for i in `seq 1 $N`; do + bash test/p2p/fast_sync/test_peer.sh $DOCKER_IMAGE $NETWORK_NAME $i $N done -bash test/p2p/peer.sh $DOCKER_IMAGE $NETWORK_NAME $COUNT $SEEDS - -bash test/p2p/client.sh $DOCKER_IMAGE $NETWORK_NAME fs_$COUNT "test/p2p/fast_sync/restart_peer.sh $COUNT" -echo "" -echo "PASS" -echo "" diff --git a/test/p2p/fast_sync/test_peer.sh b/test/p2p/fast_sync/test_peer.sh new file mode 100644 index 000000000..d3c101293 --- /dev/null +++ b/test/p2p/fast_sync/test_peer.sh @@ -0,0 +1,37 @@ +#! /bin/bash +set -eu + +DOCKER_IMAGE=$1 +NETWORK_NAME=$2 +COUNT=$3 +N=$4 + +############################################################### +# this runs on each peer: +# kill peer +# bring it back online via fast sync +# wait for it to sync and check the app hash +############################################################### + + +echo "Testing fasysync on node $COUNT" + +# kill peer +set +e # circle sigh :( +docker rm -vf local_testnet_$COUNT +set -e + +# restart peer - should have an empty blockchain +SEEDS="$(test/p2p/ip.sh 1):46656" +for j in `seq 2 $N`; do + SEEDS="$SEEDS,$(test/p2p/ip.sh $j):46656" +done +bash test/p2p/peer.sh $DOCKER_IMAGE $NETWORK_NAME $COUNT $SEEDS + +# wait for peer to sync and check the app hash +bash test/p2p/client.sh $DOCKER_IMAGE $NETWORK_NAME fs_$COUNT "test/p2p/fast_sync/check_peer.sh $COUNT" + +echo "" +echo "PASS" +echo "" + diff --git a/test/p2p/local_testnet.sh b/test/p2p/local_testnet.sh index 9380adfd4..50297d62d 100644 --- a/test/p2p/local_testnet.sh +++ b/test/p2p/local_testnet.sh @@ -3,13 +3,13 @@ set -eu DOCKER_IMAGE=$1 NETWORK_NAME=$2 +N=$3 cd $GOPATH/src/github.com/tendermint/tendermint # create docker network docker network create --driver bridge --subnet 172.57.0.0/16 $NETWORK_NAME -N=4 seeds="$(test/p2p/ip.sh 1):46656" for i in `seq 2 $N`; do seeds="$seeds,$(test/p2p/ip.sh $i):46656" diff --git a/test/p2p/test.sh b/test/p2p/test.sh index 9ca50737c..7a64d464a 100644 --- a/test/p2p/test.sh +++ b/test/p2p/test.sh @@ -3,18 +3,21 @@ set -eu DOCKER_IMAGE=$1 NETWORK_NAME=local_testnet +N=4 cd $GOPATH/src/github.com/tendermint/tendermint # start the testnet on a local network -bash test/p2p/local_testnet.sh $DOCKER_IMAGE $NETWORK_NAME +bash test/p2p/local_testnet.sh $DOCKER_IMAGE $NETWORK_NAME $N -# test atomic broadcast -bash test/p2p/client.sh $DOCKER_IMAGE $NETWORK_NAME ab test/p2p/atomic_broadcast/test.sh +# test basic connectivity and consensus +# start client container and check the num peers and height for all nodes +bash test/p2p/client.sh $DOCKER_IMAGE $NETWORK_NAME basic "test/p2p/basic/test.sh $N" -# test fast sync (from current state of network) -# run it on each of them -N=4 -for i in `seq 1 $N`; do - bash test/p2p/fast_sync/test.sh $DOCKER_IMAGE $NETWORK_NAME $i $N -done +# test atomic broadcast: +# start client container and test sending a tx to each node +bash test/p2p/client.sh $DOCKER_IMAGE $NETWORK_NAME ab "test/p2p/atomic_broadcast/test.sh $N" + +# test fast sync (from current state of network): +# for each node, kill it and readd via fast sync +bash test/p2p/fast_sync/test.sh $DOCKER_IMAGE $NETWORK_NAME $N From de6bba4609046a496dfb151051430fc948b0fda8 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 17 Dec 2016 13:24:54 -0500 Subject: [PATCH 09/10] test: randConsensusNet takes more args --- consensus/byzantine_test.go | 2 +- consensus/common_test.go | 20 +++++++------------- consensus/reactor_test.go | 10 ++++++---- types/block.go | 2 +- 4 files changed, 15 insertions(+), 19 deletions(-) diff --git a/consensus/byzantine_test.go b/consensus/byzantine_test.go index e5233806b..f103beae5 100644 --- a/consensus/byzantine_test.go +++ b/consensus/byzantine_test.go @@ -30,7 +30,7 @@ func init() { // Heal partition and ensure A sees the commit func TestByzantine(t *testing.T) { N := 4 - css := randConsensusNet(N) + css := randConsensusNet(N, "consensus_byzantine_test", crankTimeoutPropose) switches := make([]*p2p.Switch, N) for i := 0; i < N; i++ { diff --git a/consensus/common_test.go b/consensus/common_test.go index 31061fe74..cf5df2baf 100644 --- a/consensus/common_test.go +++ b/consensus/common_test.go @@ -257,15 +257,15 @@ func randConsensusState(nValidators int) (*ConsensusState, []*validatorStub) { return cs, vss } -func randConsensusNet(nValidators int) []*ConsensusState { +func randConsensusNet(nValidators int, testName string, updateConfig func(cfg.Config)) []*ConsensusState { genDoc, privVals := randGenesisDoc(nValidators, false, 10) css := make([]*ConsensusState, nValidators) for i := 0; i < nValidators; i++ { db := dbm.NewMemDB() // each state needs its own db state := sm.MakeGenesisState(db, genDoc) state.Save() - thisConfig := tendermint_test.ResetConfig(Fmt("consensus_reactor_test_%d", i)) - resetConfigTimeouts(thisConfig) + thisConfig := tendermint_test.ResetConfig(Fmt("%s_%d", testName, i)) + updateConfig(thisConfig) EnsureDir(thisConfig.GetString("cs_wal_dir"), 0700) // dir for wal css[i] = newConsensusStateWithConfig(thisConfig, state, privVals[i], counter.NewCounterApplication(true)) } @@ -273,15 +273,15 @@ func randConsensusNet(nValidators int) []*ConsensusState { } // nPeers = nValidators + nNotValidator -func randConsensusNetWithPeers(nValidators int, nPeers int) []*ConsensusState { +func randConsensusNetWithPeers(nValidators, nPeers int, testName string, updateConfig func(cfg.Config)) []*ConsensusState { genDoc, privVals := randGenesisDoc(nValidators, false, int64(testMinPower)) css := make([]*ConsensusState, nPeers) for i := 0; i < nPeers; i++ { db := dbm.NewMemDB() // each state needs its own db state := sm.MakeGenesisState(db, genDoc) state.Save() - thisConfig := tendermint_test.ResetConfig(Fmt("consensus_reactor_test_%d", i)) - resetConfigTimeouts(thisConfig) + thisConfig := tendermint_test.ResetConfig(Fmt("%s_%d", testName, i)) + updateConfig(thisConfig) EnsureDir(thisConfig.GetString("cs_wal_dir"), 0700) // dir for wal var privVal *types.PrivValidator if i < nValidators { @@ -374,14 +374,8 @@ func getSwitchIndex(switches []*p2p.Switch, peer *p2p.Peer) int { // so we dont violate synchrony assumptions // TODO: make tests more robust to this instead (handle round changes) // XXX: especially a problem when running the race detector on circle -func resetConfigTimeouts(config cfg.Config) { +func crankTimeoutPropose(config cfg.Config) { logger.SetLogLevel("info") - //config.Set("log_level", "notice") config.Set("timeout_propose", 110000) // TODO: crank it to eleventy - // config.Set("timeout_propose_delta", 500) - // config.Set("timeout_prevote", 1000) - // config.Set("timeout_prevote_delta", 500) - // config.Set("timeout_precommit", 1000) - // config.Set("timeout_precommit_delta", 500) config.Set("timeout_commit", 1000) } diff --git a/consensus/reactor_test.go b/consensus/reactor_test.go index de0eaa18b..30d50ab7c 100644 --- a/consensus/reactor_test.go +++ b/consensus/reactor_test.go @@ -24,7 +24,7 @@ func init() { // Ensure a testnet makes blocks func TestReactor(t *testing.T) { N := 4 - css := randConsensusNet(N) + css := randConsensusNet(N, "consensus_reactor_test", crankTimeoutPropose) reactors := make([]*ConsensusReactor, N) eventChans := make([]chan interface{}, N) for i := 0; i < N; i++ { @@ -58,7 +58,7 @@ func TestReactor(t *testing.T) { func TestValidatorSetChanges(t *testing.T) { nPeers := 8 nVals := 4 - css := randConsensusNetWithPeers(nVals, nPeers) + css := randConsensusNetWithPeers(nVals, nPeers, "consensus_val_set_changes_test", crankTimeoutPropose) reactors := make([]*ConsensusReactor, nPeers) eventChans := make([]chan interface{}, nPeers) for i := 0; i < nPeers; i++ { @@ -119,8 +119,10 @@ func TestValidatorSetChanges(t *testing.T) { func waitForAndValidateBlock(t *testing.T, n int, activeVals map[string]struct{}, eventChans []chan interface{}, css []*ConsensusState, txs ...[]byte) { timeoutWaitGroup(t, n, func(wg *sync.WaitGroup, j int) { - newBlock := <-eventChans[j] - err := validateBlock(newBlock.(types.EventDataNewBlock).Block, activeVals) + newBlockI := <-eventChans[j] + newBlock := newBlockI.(types.EventDataNewBlock).Block + log.Info("Got block", "height", newBlock.Height, "validator", j) + err := validateBlock(newBlock, activeVals) if err != nil { t.Fatal(err) } diff --git a/types/block.go b/types/block.go index bcabe5b6b..d971b90b6 100644 --- a/types/block.go +++ b/types/block.go @@ -270,7 +270,7 @@ func (commit *Commit) BitArray() *BitArray { commit.bitArray.SetIndex(i, precommit != nil) } } - return commit.bitArray.Copy() + return commit.bitArray } func (commit *Commit) GetByIndex(index int) *Vote { From 81f91aebc23031c75916459b0037011d777796a1 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Sat, 17 Dec 2016 15:16:58 -0500 Subject: [PATCH 10/10] test: crank circle timeouts --- circle.yml | 2 +- test/test_cover.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/circle.yml b/circle.yml index 671ea14ca..2b54c66bb 100644 --- a/circle.yml +++ b/circle.yml @@ -30,7 +30,7 @@ dependencies: test: override: - "cd $REPO && make test_integrations": - timeout: 1200 + timeout: 1800 post: - "cd $REPO && bash <(curl -s https://codecov.io/bash)" diff --git a/test/test_cover.sh b/test/test_cover.sh index 52d45dc53..5b97b9d90 100644 --- a/test/test_cover.sh +++ b/test/test_cover.sh @@ -5,7 +5,7 @@ PKGS=$(go list github.com/tendermint/tendermint/... | grep -v /vendor/) set -e echo "mode: atomic" > coverage.txt for pkg in ${PKGS[@]}; do - go test -race -coverprofile=profile.out -covermode=atomic $pkg + go test -timeout 20m -race -coverprofile=profile.out -covermode=atomic $pkg if [ -f profile.out ]; then tail -n +2 profile.out >> coverage.txt; rm profile.out