diff --git a/.github/workflows/e2e-nightly.yml b/.github/workflows/e2e-nightly.yml new file mode 100644 index 000000000..96b406961 --- /dev/null +++ b/.github/workflows/e2e-nightly.yml @@ -0,0 +1,43 @@ +# Runs randomly generated E2E testnets nightly. +name: e2e-nightly +on: + workflow_dispatch: # allow running workflow manually + schedule: + - cron: '0 2 * * *' + +jobs: + e2e-nightly-test: + # Run parallel jobs for the listed testnet groups (must match the + # ./build/generator -g flag) + strategy: + fail-fast: false + matrix: + group: ['00', '01', '02', '03'] + runs-on: ubuntu-latest + timeout-minutes: 60 + steps: + - uses: actions/checkout@v2 + + - name: Build + working-directory: test/e2e + # Run make jobs in parallel, since we can't run steps in parallel. + run: make -j2 docker generator runner + + - name: Generate testnets + working-directory: test/e2e + run: ./build/generator -g 4 -d networks/nightly + + - name: Run testnets in group ${{ matrix.group }} + working-directory: test/e2e + run: sudo ./run-multiple.sh networks/nightly/*-group${{ matrix.group }}-*.toml + + - name: Notify Slack on failure + uses: rtCamp/action-slack-notify@e9db0ef + if: ${{ failure() }} + env: + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + SLACK_CHANNEL: tendermint-internal + SLACK_USERNAME: Nightly E2E Test Failure + SLACK_COLOR: danger + SLACK_MESSAGE: Nightly E2E test failed (group ${{ matrix.group }}) + SLACK_FOOTER: '' diff --git a/test/e2e/networks/ci.toml b/test/e2e/networks/ci.toml index e366bb6c5..12478ef99 100644 --- a/test/e2e/networks/ci.toml +++ b/test/e2e/networks/ci.toml @@ -26,11 +26,11 @@ validator05 = 50 [node.seed01] mode = "seed" -persistent_peers = ["seed02"] +seeds = ["seed02"] [node.seed02] mode = "seed" -persistent_peers = ["seed01"] +seeds = ["seed01"] [node.validator01] seeds = ["seed01"] diff --git a/test/e2e/runner/load.go b/test/e2e/runner/load.go index a59b3f503..495c573d3 100644 --- a/test/e2e/runner/load.go +++ b/test/e2e/runner/load.go @@ -16,9 +16,17 @@ import ( // Load generates transactions against the network until the given // context is cancelled. func Load(ctx context.Context, testnet *e2e.Testnet) error { - concurrency := 50 + // Since transactions are executed across all nodes in the network, we need + // to reduce transaction load for larger networks to avoid using too much + // CPU. This gives high-throughput small networks and low-throughput large ones. + // This also limits the number of TCP connections, since each worker has + // a connection to all nodes. + concurrency := 64 / len(testnet.Nodes) + if concurrency == 0 { + concurrency = 1 + } initialTimeout := 1 * time.Minute - stallTimeout := 15 * time.Second + stallTimeout := 30 * time.Second chTx := make(chan types.Tx) chSuccess := make(chan types.Tx) @@ -26,7 +34,7 @@ func Load(ctx context.Context, testnet *e2e.Testnet) error { defer cancel() // Spawn job generator and processors. - logger.Info("Starting transaction load...") + logger.Info(fmt.Sprintf("Starting transaction load (%v workers)...", concurrency)) started := time.Now() go loadGenerate(ctx, chTx) diff --git a/test/e2e/runner/start.go b/test/e2e/runner/start.go index bf52e190e..915222faf 100644 --- a/test/e2e/runner/start.go +++ b/test/e2e/runner/start.go @@ -24,7 +24,7 @@ func Start(testnet *e2e.Testnet) error { if err := execCompose(testnet.Dir, "up", "-d", node.Name); err != nil { return err } - if _, err := waitForNode(node, 0, 10*time.Second); err != nil { + if _, err := waitForNode(node, 0, 15*time.Second); err != nil { return err } logger.Info(fmt.Sprintf("Node %v up on http://127.0.0.1:%v", node.Name, node.ProxyPort)) @@ -56,7 +56,7 @@ func Start(testnet *e2e.Testnet) error { if err := execCompose(testnet.Dir, "up", "-d", node.Name); err != nil { return err } - status, err := waitForNode(node, node.StartAt, 30*time.Second) + status, err := waitForNode(node, node.StartAt, 1*time.Minute) if err != nil { return err }