From 53022220f6699874f3ef5375d7e98d3b346cc4b0 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Tue, 27 Oct 2020 17:22:00 +0100 Subject: [PATCH] test: fix various E2E test issues (#5576) * Don't use state sync for nodes starting at initial height. * Also remove stopped containers when cleaning up. * Start nodes in order of startAt, mode, name to avoid full nodes starting before their seeds. * Tweak network waiting to avoid halts caused by validator changes and perturbations. * Disable most tests for seed nodes, which aren't always able to join consensus. * Disable `blockchain/v2` due to known bugs. --- test/e2e/generator/generate.go | 28 +++++++++++++++++++--------- test/e2e/networks/ci.toml | 9 ++++++--- test/e2e/pkg/testnet.go | 10 ++++++++++ test/e2e/runner/cleanup.go | 2 +- test/e2e/runner/main.go | 22 +++++++++++++++------- test/e2e/runner/start.go | 15 ++++++++++++++- test/e2e/tests/app_test.go | 11 +++++++++++ test/e2e/tests/block_test.go | 8 ++++++++ test/e2e/tests/validator_test.go | 4 ++++ 9 files changed, 88 insertions(+), 21 deletions(-) diff --git a/test/e2e/generator/generate.go b/test/e2e/generator/generate.go index 37779248e..3910913d2 100644 --- a/test/e2e/generator/generate.go +++ b/test/e2e/generator/generate.go @@ -29,7 +29,10 @@ var ( nodeABCIProtocols = uniformChoice{"unix", "tcp", "grpc", "builtin"} nodePrivvalProtocols = uniformChoice{"file", "unix", "tcp"} // FIXME v1 disabled due to https://github.com/tendermint/tendermint/issues/5444 - nodeFastSyncs = uniformChoice{"", "v0", "v2"} // "v1", + // FIXME v2 disabled due to: + // https://github.com/tendermint/tendermint/issues/5513 + // https://github.com/tendermint/tendermint/issues/5541 + nodeFastSyncs = uniformChoice{"", "v0"} // "v1", "v2" nodeStateSyncs = uniformChoice{false, true} nodePersistIntervals = uniformChoice{0, 1, 5} nodeSnapshotIntervals = uniformChoice{0, 3} @@ -90,7 +93,8 @@ func generateTestnet(r *rand.Rand, opt map[string]interface{}) (e2e.Manifest, er // First we generate seed nodes, starting at the initial height. for i := 1; i <= numSeeds; i++ { - manifest.Nodes[fmt.Sprintf("seed%02d", i)] = generateNode(r, e2e.ModeSeed, 0, false) + manifest.Nodes[fmt.Sprintf("seed%02d", i)] = generateNode( + r, e2e.ModeSeed, 0, manifest.InitialHeight, false) } // Next, we generate validators. We make sure a BFT quorum of validators start @@ -99,15 +103,16 @@ func generateTestnet(r *rand.Rand, opt map[string]interface{}) (e2e.Manifest, er nextStartAt := manifest.InitialHeight + 5 quorum := numValidators*2/3 + 1 for i := 1; i <= numValidators; i++ { - startAt := manifest.InitialHeight + startAt := int64(0) if i > quorum { startAt = nextStartAt nextStartAt += 5 } name := fmt.Sprintf("validator%02d", i) - manifest.Nodes[name] = generateNode(r, e2e.ModeValidator, startAt, i <= 2) + manifest.Nodes[name] = generateNode( + r, e2e.ModeValidator, startAt, manifest.InitialHeight, i <= 2) - if startAt == manifest.InitialHeight { + if startAt == 0 { (*manifest.Validators)[name] = int64(30 + r.Intn(71)) } else { manifest.ValidatorUpdates[fmt.Sprint(startAt+5)] = map[string]int64{ @@ -133,7 +138,8 @@ func generateTestnet(r *rand.Rand, opt map[string]interface{}) (e2e.Manifest, er startAt = nextStartAt nextStartAt += 5 } - manifest.Nodes[fmt.Sprintf("full%02d", i)] = generateNode(r, e2e.ModeFull, startAt, false) + manifest.Nodes[fmt.Sprintf("full%02d", i)] = generateNode( + r, e2e.ModeFull, startAt, manifest.InitialHeight, false) } // We now set up peer discovery for nodes. Seed nodes are fully meshed with @@ -183,7 +189,8 @@ func generateTestnet(r *rand.Rand, opt map[string]interface{}) (e2e.Manifest, er // here, since we need to know the overall network topology and startup // sequencing. func generateNode( - r *rand.Rand, mode e2e.Mode, startAt int64, forceArchive bool) *e2e.ManifestNode { + r *rand.Rand, mode e2e.Mode, startAt int64, initialHeight int64, forceArchive bool, +) *e2e.ManifestNode { node := e2e.ManifestNode{ Mode: string(mode), StartAt: startAt, @@ -206,8 +213,11 @@ func generateNode( } if node.Mode == "validator" { - node.Misbehaviors = nodeMisbehaviors.Choose(r).(misbehaviorOption). - atHeight(startAt + 5 + int64(r.Intn(10))) + misbehaveAt := startAt + 5 + int64(r.Intn(10)) + if startAt == 0 { + misbehaveAt += initialHeight - 1 + } + node.Misbehaviors = nodeMisbehaviors.Choose(r).(misbehaviorOption).atHeight(misbehaveAt) if len(node.Misbehaviors) != 0 { node.PrivvalProtocol = "file" } diff --git a/test/e2e/networks/ci.toml b/test/e2e/networks/ci.toml index 67cd6fbb6..05dd09e28 100644 --- a/test/e2e/networks/ci.toml +++ b/test/e2e/networks/ci.toml @@ -1,4 +1,4 @@ -# This testnet is (will be) run by CI, and attempts to cover a broad range of +# This testnet is run by CI, and attempts to cover a broad range of # functionality with a single network. initial_height = 1000 @@ -79,14 +79,17 @@ start_at = 1010 mode = "full" # FIXME Should use v1, but it won't catch up since some nodes don't have all blocks # https://github.com/tendermint/tendermint/issues/5444 -fast_sync = "v2" +fast_sync = "v0" persistent_peers = ["validator01", "validator02", "validator03", "validator04", "validator05"] perturb = ["restart"] [node.full02] start_at = 1015 mode = "full" -fast_sync = "v2" +# FIXME Should use v2, but it has concurrency bugs causing panics or halts +# https://github.com/tendermint/tendermint/issues/5513 +# https://github.com/tendermint/tendermint/issues/5541 +fast_sync = "v0" state_sync = true seeds = ["seed01"] perturb = ["restart"] diff --git a/test/e2e/pkg/testnet.go b/test/e2e/pkg/testnet.go index 3425d7041..fa559fe61 100644 --- a/test/e2e/pkg/testnet.go +++ b/test/e2e/pkg/testnet.go @@ -403,6 +403,16 @@ func (t Testnet) IPv6() bool { return t.IP.IP.To4() == nil } +// HasPerturbations returns whether the network has any perturbations. +func (t Testnet) HasPerturbations() bool { + for _, node := range t.Nodes { + if len(node.Perturbations) > 0 { + return true + } + } + return false +} + // LastMisbehaviorHeight returns the height of the last misbehavior. func (t Testnet) LastMisbehaviorHeight() int64 { lastHeight := int64(0) diff --git a/test/e2e/runner/cleanup.go b/test/e2e/runner/cleanup.go index 0af49db7d..d99ca54cf 100644 --- a/test/e2e/runner/cleanup.go +++ b/test/e2e/runner/cleanup.go @@ -32,7 +32,7 @@ func cleanupDocker() error { xargsR := `$(if [[ $OSTYPE == "linux-gnu"* ]]; then echo -n "-r"; fi)` err := exec("bash", "-c", fmt.Sprintf( - "docker container ls -q --filter label=e2e | xargs %v docker container rm -f", xargsR)) + "docker container ls -qa --filter label=e2e | xargs %v docker container rm -f", xargsR)) if err != nil { return err } diff --git a/test/e2e/runner/main.go b/test/e2e/runner/main.go index b20454e6b..d55fd95f2 100644 --- a/test/e2e/runner/main.go +++ b/test/e2e/runner/main.go @@ -69,25 +69,33 @@ func NewCLI() *CLI { if err := Start(cli.testnet); err != nil { return err } + if lastMisbehavior := cli.testnet.LastMisbehaviorHeight(); lastMisbehavior > 0 { - // wait for misbehaviors before starting perturbations - if err := WaitUntil(cli.testnet, lastMisbehavior+5); err != nil { + // wait for misbehaviors before starting perturbations. We do a separate + // wait for another 5 blocks, since the last misbehavior height may be + // in the past depending on network startup ordering. + if err := WaitUntil(cli.testnet, lastMisbehavior); err != nil { return err } } - if err := Perturb(cli.testnet); err != nil { - return err - } if err := Wait(cli.testnet, 5); err != nil { // allow some txs to go through return err } + if cli.testnet.HasPerturbations() { + if err := Perturb(cli.testnet); err != nil { + return err + } + if err := Wait(cli.testnet, 5); err != nil { // allow some txs to go through + return err + } + } + loadCancel() if err := <-chLoadResult; err != nil { return err } - // wait for network to settle before tests - if err := Wait(cli.testnet, 5); err != nil { + if err := Wait(cli.testnet, 5); err != nil { // wait for network to settle before tests return err } if err := Test(cli.testnet); err != nil { diff --git a/test/e2e/runner/start.go b/test/e2e/runner/start.go index b755f8965..53acdd821 100644 --- a/test/e2e/runner/start.go +++ b/test/e2e/runner/start.go @@ -10,8 +10,21 @@ import ( func Start(testnet *e2e.Testnet) error { - // Sort nodes by starting order + // Nodes are already sorted by name. Sort them by name then startAt, + // which gives the overall order startAt, mode, name. nodeQueue := testnet.Nodes + sort.SliceStable(nodeQueue, func(i, j int) bool { + a, b := nodeQueue[i], nodeQueue[j] + switch { + case a.Mode == b.Mode: + return false + case a.Mode == e2e.ModeSeed: + return true + case a.Mode == e2e.ModeValidator && b.Mode == e2e.ModeFull: + return true + } + return false + }) sort.SliceStable(nodeQueue, func(i, j int) bool { return nodeQueue[i].StartAt < nodeQueue[j].StartAt }) diff --git a/test/e2e/tests/app_test.go b/test/e2e/tests/app_test.go index 33eac1b40..82e788ebd 100644 --- a/test/e2e/tests/app_test.go +++ b/test/e2e/tests/app_test.go @@ -16,6 +16,9 @@ import ( // Tests that any initial state given in genesis has made it into the app. func TestApp_InitialState(t *testing.T) { testNode(t, func(t *testing.T, node e2e.Node) { + if node.Mode == e2e.ModeSeed { + return + } if len(node.Testnet.InitialState) == 0 { return } @@ -35,6 +38,10 @@ func TestApp_InitialState(t *testing.T) { // block and the node sync status. func TestApp_Hash(t *testing.T) { testNode(t, func(t *testing.T, node e2e.Node) { + if node.Mode == e2e.ModeSeed { + return + } + client, err := node.Client() require.NoError(t, err) info, err := client.ABCIInfo(ctx) @@ -56,6 +63,10 @@ func TestApp_Hash(t *testing.T) { // Tests that we can set a value and retrieve it. func TestApp_Tx(t *testing.T) { testNode(t, func(t *testing.T, node e2e.Node) { + if node.Mode == e2e.ModeSeed { + return + } + client, err := node.Client() require.NoError(t, err) diff --git a/test/e2e/tests/block_test.go b/test/e2e/tests/block_test.go index 23653d1e4..369b49d61 100644 --- a/test/e2e/tests/block_test.go +++ b/test/e2e/tests/block_test.go @@ -13,6 +13,10 @@ import ( func TestBlock_Header(t *testing.T) { blocks := fetchBlockChain(t) testNode(t, func(t *testing.T, node e2e.Node) { + if node.Mode == e2e.ModeSeed { + return + } + client, err := node.Client() require.NoError(t, err) status, err := client.Status(ctx) @@ -42,6 +46,10 @@ func TestBlock_Header(t *testing.T) { // Tests that the node contains the expected block range. func TestBlock_Range(t *testing.T) { testNode(t, func(t *testing.T, node e2e.Node) { + if node.Mode == e2e.ModeSeed { + return + } + client, err := node.Client() require.NoError(t, err) status, err := client.Status(ctx) diff --git a/test/e2e/tests/validator_test.go b/test/e2e/tests/validator_test.go index 47eb1555a..29f63bd92 100644 --- a/test/e2e/tests/validator_test.go +++ b/test/e2e/tests/validator_test.go @@ -14,6 +14,10 @@ import ( // scheduled validator updates. func TestValidator_Sets(t *testing.T) { testNode(t, func(t *testing.T, node e2e.Node) { + if node.Mode == e2e.ModeSeed { + return + } + client, err := node.Client() require.NoError(t, err) status, err := client.Status(ctx)