From f53fb4630294692b6e09b1f9c19c99163bc092da Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Thu, 20 Apr 2017 18:24:42 -0400 Subject: [PATCH] add terraforce deployment method --- terraforce/README.md | 149 +++++++++++++++++++++++ terraforce/cluster/main.tf | 77 ++++++++++++ terraforce/examples/dummy/bins | 2 + terraforce/examples/dummy/run.sh | 8 ++ terraforce/examples/in-proc-linux/bins | 1 + terraforce/examples/in-proc-linux/run.sh | 7 ++ terraforce/examples/in-proc/bins | 1 + terraforce/examples/in-proc/run.sh | 7 ++ terraforce/main.tf | 30 +++++ terraforce/scripts/copy_run.sh | 10 ++ terraforce/scripts/init.sh | 43 +++++++ terraforce/scripts/query.sh | 11 ++ terraforce/scripts/reset.sh | 10 ++ terraforce/scripts/restart.sh | 9 ++ terraforce/scripts/start.sh | 10 ++ terraforce/scripts/stop.sh | 9 ++ terraforce/test.sh | 30 +++++ terraforce/transact/transact.go | 140 +++++++++++++++++++++ 18 files changed, 554 insertions(+) create mode 100644 terraforce/README.md create mode 100644 terraforce/cluster/main.tf create mode 100644 terraforce/examples/dummy/bins create mode 100644 terraforce/examples/dummy/run.sh create mode 100644 terraforce/examples/in-proc-linux/bins create mode 100644 terraforce/examples/in-proc-linux/run.sh create mode 100644 terraforce/examples/in-proc/bins create mode 100644 terraforce/examples/in-proc/run.sh create mode 100644 terraforce/main.tf create mode 100644 terraforce/scripts/copy_run.sh create mode 100644 terraforce/scripts/init.sh create mode 100644 terraforce/scripts/query.sh create mode 100644 terraforce/scripts/reset.sh create mode 100644 terraforce/scripts/restart.sh create mode 100644 terraforce/scripts/start.sh create mode 100644 terraforce/scripts/stop.sh create mode 100644 terraforce/test.sh create mode 100644 terraforce/transact/transact.go diff --git a/terraforce/README.md b/terraforce/README.md new file mode 100644 index 000000000..a91e21aa8 --- /dev/null +++ b/terraforce/README.md @@ -0,0 +1,149 @@ +# Stack + +This is a stripped down version of https://github.com/segmentio/stack +plus some shell scripts. + +It is responsible for the following: + + - spin up a cluster of nodes + - copy config files for a tendermint testnet to each node + - copy linux binaries for tendermint and the app to each node + - start tendermint on every node + +# How it Works + +To use, a user must only provide a directory containing two files: `bins` and `run.sh`. + +The `bins` file is a list of binaries, for instance: + +``` +$GOPATH/bin/tendermint +$GOPATH/bin/dummy +``` + +and the `run.sh` specifies how those binaries ought to be started: + +``` +#! /bin/bash + +if [[ "$SEEDS" != "" ]]; then + SEEDS_FLAG="--seeds=$SEEDS" +fi + +./dummy --persist .tendermint/data/dummy_data >> app.log 2>&1 & +./tendermint node --log_level=info $SEEDS_FLAG >> tendermint.log 2>&1 & +``` + +This let's you specify exactly which versions of Tendermint and the application are to be used, +and how they ought to be started. + +Note that these binaries *MUST* be compiled for Linux. +If you are not on Linux, you can compile binaries for linux using `go build` with the `GOOS` variable: + +``` +GOOS=linux go build -o $GOPATH/bin/tendermint-linux $GOPATH/src/github.com/tendermint/tendermint/cmd/tendermint +``` + +This cross-compilation must be done for each binary you want to copy over. + +If you want to use an application that requires more than just a few binaries, you may need to do more manual work, +for instance using `terraforce` to set up the development environment on every machine. + +# Dependencies + +We use `terraform` for spinning up the machines, +and a custom rolled tool, `terraforce`, +for running commands on many machines in parallel. +You can download terraform here: https://www.terraform.io/downloads.html +To download terraforce, run `go get github.com/ebuchman/terraforce` + +We use `tendermint` itself to generate files for a testnet. +You can install `tendermint` with + +``` +cd $GOPATH/src/github.com/tendermint/tendermint +glide install +go install ./cmd/tendermint +``` + +You also need to set the `DIGITALOCEAN_TOKEN` environment variables so that terraform can +spin up nodes on digital ocean. + +This stack is currently some terraform and a bunch of shell scripts, +so its helpful to work out of a directory containing everything. +Either change directory to `$GOPATH/src/github.com/tendermint/tendermint/test/net` +or make a copy of that directory and change to it. All commands are expected to be executed from there. + +For terraform to work, you must first run `terraform get` + +# Create + +To create a cluster with 4 nodes, run + +``` +terraform apply +``` + +To use a different number of nodes, change the `desired_capacity` parameter in the `main.tf`. + +Note that terraform keeps track of the current state of your infrastructure, +so if you change the `desired_capacity` and run `terraform apply` again, it will add or remove nodes as necessary. + +If you think that's amazing, so do we. + +To get some info about the cluster, run `terraform output`. + +See the [terraform docs](https://www.terraform.io/docs/index.html) for more details. + +To tear down the cluster, run `terraform destroy`. + +# Initialize + +Now that we have a cluster up and running, let's generate the necessary files for a Tendermint node and copy them over. +A Tendermint node needs, at the least, a `priv_validator.json` and a `genesis.json`. +To generate files for the nodes, run + +``` +tendermint testnet 4 mytestnet +``` + +This will create the directory `mytestnet`, containing one directory for each of the 4 nodes. +Each node directory contains a unique `priv_validator.json` and a `genesis.json`, +where the `genesis.json` contains the public keys of all `priv_validator.json` files. + +If you want to add more files to each node for your particular app, you'll have to add them to each of the node directories. + +Now we can copy everything over to the cluster. +If you are on Linux, run + +``` +bash scripts/init.sh 4 mytestnet examples/in-proc +``` + +Otherwise (if you are not on Linux), make sure you ran + +``` +GOOS=linux go build -o $GOPATH/bin/tendermint-linux $GOPATH/src/github.com/tendermint/tendermint/cmd/tendermint +``` + +and now run + +``` +bash scripts/init.sh 4 mytestnet examples/in-proc-linux +``` + +# Start + +Finally, to start Tendermint on all the nodes, run + +``` +bash scripts/start.sh 4 +``` + +# Check + +Query the status of all your nodes: + +``` +bash scripts/query.sh 4 status +``` diff --git a/terraforce/cluster/main.tf b/terraforce/cluster/main.tf new file mode 100644 index 000000000..99435a03e --- /dev/null +++ b/terraforce/cluster/main.tf @@ -0,0 +1,77 @@ +/** + * Cluster on DO + * + */ + +variable "name" { + description = "The cluster name, e.g cdn" +} + +variable "environment" { + description = "Environment tag, e.g prod" +} + +variable "image_id" { + description = "Image ID" +} + +variable "regions" { + description = "Regions to launch in" + type = "list" +} + +variable "key_ids" { + description = "SSH keys to use" + type = "list" +} + +variable "instance_size" { + description = "The instance size to use, e.g 2gb" +} + +variable "desired_capacity" { + description = "Desired instance count" + default = 3 +} + +#----------------------- +# Instances + +resource "digitalocean_droplet" "cluster" { + # set the image and instance type + name = "${var.name}${count.index}" + image = "${var.image_id}" + size = "${var.instance_size}" + + # the `element` function handles modulo + region = "${element(var.regions, count.index)}" + + ssh_keys = "${var.key_ids}" + + count = "${var.desired_capacity}" + lifecycle = { + prevent_destroy = false + } +} + +#----------------------- + +// The cluster name, e.g cdn +output "name" { + value = "${var.name}" +} + +// The list of cluster instance ids +output "instances" { + value = ["${digitalocean_droplet.cluster.*.id}"] +} + +// The list of cluster instance ips +output "private_ips" { + value = ["${digitalocean_droplet.cluster.*.ipv4_address_private}"] +} + +// The list of cluster instance ips +output "public_ips" { + value = ["${digitalocean_droplet.cluster.*.ipv4_address}"] +} diff --git a/terraforce/examples/dummy/bins b/terraforce/examples/dummy/bins new file mode 100644 index 000000000..0890780df --- /dev/null +++ b/terraforce/examples/dummy/bins @@ -0,0 +1,2 @@ +$GOPATH/bin/tendermint +$GOPATH/bin/dummy diff --git a/terraforce/examples/dummy/run.sh b/terraforce/examples/dummy/run.sh new file mode 100644 index 000000000..469b260c6 --- /dev/null +++ b/terraforce/examples/dummy/run.sh @@ -0,0 +1,8 @@ +#! /bin/bash + +if [[ "$SEEDS" != "" ]]; then + SEEDS_FLAG="--seeds=$SEEDS" +fi + +./dummy --persist .tendermint/data/dummy_data >> app.log 2>&1 & +./tendermint node --log_level=info $SEEDS_FLAG >> tendermint.log 2>&1 & diff --git a/terraforce/examples/in-proc-linux/bins b/terraforce/examples/in-proc-linux/bins new file mode 100644 index 000000000..fa0c1052d --- /dev/null +++ b/terraforce/examples/in-proc-linux/bins @@ -0,0 +1 @@ +$GOPATH/bin/tendermint-linux diff --git a/terraforce/examples/in-proc-linux/run.sh b/terraforce/examples/in-proc-linux/run.sh new file mode 100644 index 000000000..ff233950e --- /dev/null +++ b/terraforce/examples/in-proc-linux/run.sh @@ -0,0 +1,7 @@ +#! /bin/bash + +if [[ "$SEEDS" != "" ]]; then + SEEDS_FLAG="--seeds=$SEEDS" +fi + +./tendermint-linux node --proxy_app=dummy --log_level=note $SEEDS_FLAG >> tendermint.log 2>&1 & diff --git a/terraforce/examples/in-proc/bins b/terraforce/examples/in-proc/bins new file mode 100644 index 000000000..f48f5e353 --- /dev/null +++ b/terraforce/examples/in-proc/bins @@ -0,0 +1 @@ +$GOPATH/bin/tendermint diff --git a/terraforce/examples/in-proc/run.sh b/terraforce/examples/in-proc/run.sh new file mode 100644 index 000000000..0627439b4 --- /dev/null +++ b/terraforce/examples/in-proc/run.sh @@ -0,0 +1,7 @@ +#! /bin/bash + +if [[ "$SEEDS" != "" ]]; then + SEEDS_FLAG="--seeds=$SEEDS" +fi + +./tendermint node --proxy_app=dummy --log_level=note $SEEDS_FLAG >> tendermint.log 2>&1 & diff --git a/terraforce/main.tf b/terraforce/main.tf new file mode 100644 index 000000000..008438403 --- /dev/null +++ b/terraforce/main.tf @@ -0,0 +1,30 @@ +module "cluster" { + source = "./cluster" + environment = "test" + name = "tendermint-testnet" + + # curl -X GET -H "Content-Type: application/json" -H "Authorization: Bearer $DIGITALOCEAN_TOKEN" "https://api.digitalocean.com/v2/account/keys" + key_ids = [8163311] + + image_id = "ubuntu-14-04-x64" + desired_capacity = 4 + instance_size = "2gb" + + regions = ["AMS2", "FRA1", "LON1", "NYC2", "SFO2", "SGP1", "TOR1"] +} + + +provider "digitalocean" { +} + +output "public_ips" { + value = "${module.cluster.public_ips}" +} + +output "private_ips" { + value = "${join(",",module.cluster.private_ips)}" +} + +output "seeds" { + value = "${join(":46656,",module.cluster.public_ips)}:46656" +} diff --git a/terraforce/scripts/copy_run.sh b/terraforce/scripts/copy_run.sh new file mode 100644 index 000000000..31c8eb117 --- /dev/null +++ b/terraforce/scripts/copy_run.sh @@ -0,0 +1,10 @@ +#! /bin/bash +set -u + +N=$1 # number of nodes +RUN=$2 # path to run script + +N_=$((N-1)) + +# stop all tendermint +terraforce scp --user root --ssh-key $HOME/.ssh/id_rsa --machines "[0-$N_]" $RUN run.sh diff --git a/terraforce/scripts/init.sh b/terraforce/scripts/init.sh new file mode 100644 index 000000000..15b8e3247 --- /dev/null +++ b/terraforce/scripts/init.sh @@ -0,0 +1,43 @@ +#! /bin/bash +set -u + +N=$1 # number of nodes +TESTNET=$2 # path to folder containing testnet info +CONFIG=$3 # path to folder containing `bins` and `run.sh` files + +if [[ ! -f $CONFIG/bins ]]; then + echo "config folder ($CONFIG) must contain bins file" + exit 1 +fi +if [[ ! -f $CONFIG/run.sh ]]; then + echo "config folder ($CONFIG) must contain run.sh file" + exit 1 +fi + +KEY=$HOME/.ssh/id_rsa + +FLAGS="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" + +N_=$((N-1)) # 0-based index + +MACH_ROOT="$TESTNET/mach?" + + +# mkdir +terraforce ssh --user root --ssh-key $KEY --machines "[0-$N_]" mkdir .tendermint + +# copy over genesis/priv_val +terraforce scp --user root --ssh-key $KEY --iterative --machines "[0-$N_]" "$MACH_ROOT/priv_validator.json" .tendermint/priv_validator.json +terraforce scp --user root --ssh-key $KEY --iterative --machines "[0-$N_]" "$MACH_ROOT/genesis.json" .tendermint/genesis.json + +# copy the run script +terraforce scp --user root --ssh-key $KEY --machines "[0-$N_]" $CONFIG/run.sh run.sh + +# copy the binaries +while read line; do + local_bin=$(eval echo $line) + remote_bin=$(basename $local_bin) + echo $local_bin + terraforce scp --user root --ssh-key $KEY --machines "[0-$N_]" $local_bin $remote_bin + terraforce ssh --user root --ssh-key $KEY --machines "[0-$N_]" chmod +x $remote_bin +done <$CONFIG/bins diff --git a/terraforce/scripts/query.sh b/terraforce/scripts/query.sh new file mode 100644 index 000000000..c77cf61dc --- /dev/null +++ b/terraforce/scripts/query.sh @@ -0,0 +1,11 @@ +#! /bin/bash +set -u + +N=$1 # number of nodes +QUERY=$2 + +N_=$((N-1)) + +# start all tendermint nodes +terraforce ssh --user root --ssh-key $HOME/.ssh/id_rsa --machines "[0-$N_]" curl -s localhost:46657/$QUERY + diff --git a/terraforce/scripts/reset.sh b/terraforce/scripts/reset.sh new file mode 100644 index 000000000..2bef5324c --- /dev/null +++ b/terraforce/scripts/reset.sh @@ -0,0 +1,10 @@ +#! /bin/bash +set -u + +N=$1 # number of nodes + +N_=$((N-1)) + +# stop all tendermint +terraforce ssh --user root --ssh-key $HOME/.ssh/id_rsa --machines "[0-$N_]" rm -rf .tendermint/data +terraforce ssh --user root --ssh-key $HOME/.ssh/id_rsa --machines "[0-$N_]" ./tendermint unsafe_reset_priv_validator diff --git a/terraforce/scripts/restart.sh b/terraforce/scripts/restart.sh new file mode 100644 index 000000000..03ff1918e --- /dev/null +++ b/terraforce/scripts/restart.sh @@ -0,0 +1,9 @@ +#! /bin/bash +set -u + +N=$1 # number of nodes + +N_=$((N-1)) + +# start +terraforce ssh --user root --ssh-key $HOME/.ssh/id_rsa --machines "[0-$N_]" SEEDS=$(terraform output seeds) bash run.sh diff --git a/terraforce/scripts/start.sh b/terraforce/scripts/start.sh new file mode 100644 index 000000000..e72a8da68 --- /dev/null +++ b/terraforce/scripts/start.sh @@ -0,0 +1,10 @@ +#! /bin/bash +set -u + +N=$1 # number of nodes + +N_=$((N-1)) + +# start all tendermint nodes +terraforce ssh --user root --ssh-key $HOME/.ssh/id_rsa --machines "[0-$N_]" SEEDS=$(terraform output seeds) bash run.sh + diff --git a/terraforce/scripts/stop.sh b/terraforce/scripts/stop.sh new file mode 100644 index 000000000..bdb55869c --- /dev/null +++ b/terraforce/scripts/stop.sh @@ -0,0 +1,9 @@ +#! /bin/bash +set -u + +N=$1 # number of nodes + +N_=$((N-1)) + +# stop all tendermint +terraforce ssh --user root --ssh-key $HOME/.ssh/id_rsa --machines "[0-$N_]" killall tendermint diff --git a/terraforce/test.sh b/terraforce/test.sh new file mode 100644 index 000000000..d69dc9180 --- /dev/null +++ b/terraforce/test.sh @@ -0,0 +1,30 @@ +#! /bin/bash + +cd $GOPATH/src/github.com/tendermint/tendermint + +TEST_PATH=./test/net/new + +N=4 +TESTNET_DIR=mytestnet + +# install deps +# TODO: we should build a Docker image and +# really do everything that follows in the container +# bash setup.sh + + +# launch infra +terraform get +terraform apply + +# create testnet files +tendermint testnet -n $N -dir $TESTNET_DIR + +# expects a linux tendermint binary to be built already +bash scripts/init.sh $N $TESTNET_DIR test/net/examples/in-proc + +# testnet should now be running :) +bash scripts/start.sh 4 + + + diff --git a/terraforce/transact/transact.go b/terraforce/transact/transact.go new file mode 100644 index 000000000..bb4dabd1d --- /dev/null +++ b/terraforce/transact/transact.go @@ -0,0 +1,140 @@ +package main + +import ( + "crypto/rand" + "encoding/binary" + "encoding/hex" + "flag" + "fmt" + "os" + "strconv" + "strings" + "sync" + "time" + + "github.com/tendermint/go-rpc/client" + rpctypes "github.com/tendermint/go-rpc/types" +) + +func main() { + flag.Parse() + args := flag.Args() + if len(args) < 2 { + fmt.Println("transact.go expects at least two arguments (ntxs, hosts)") + os.Exit(1) + } + + nTxS, hostS := args[0], args[1] + nTxs, err := strconv.Atoi(nTxS) + if err != nil { + fmt.Println("ntxs must be an integer:", err) + os.Exit(1) + } + + hosts := strings.Split(hostS, ",") + + errCh := make(chan error, 1000) + + wg := new(sync.WaitGroup) + wg.Add(len(hosts)) + start := time.Now() + fmt.Printf("Sending %d txs on every host %v\n", nTxs, hosts) + for i, host := range hosts { + go broadcastTxsToHost(wg, errCh, i, host, nTxs, 0) + } + wg.Wait() + fmt.Println("Done broadcasting txs. Took", time.Since(start)) + +} + +func broadcastTxsToHost(wg *sync.WaitGroup, errCh chan error, valI int, valHost string, nTxs int, txCount int) { + reconnectSleepSeconds := time.Second * 1 + + // thisStart := time.Now() + // cli := rpcclient.NewClientURI(valHost + ":46657") + fmt.Println("Connecting to host to broadcast txs", valI, valHost) + cli := rpcclient.NewWSClient(valHost, "/websocket") + if _, err := cli.Start(); err != nil { + if nTxs == 0 { + time.Sleep(reconnectSleepSeconds) + broadcastTxsToHost(wg, errCh, valI, valHost, nTxs, txCount) + return + } + fmt.Printf("Error starting websocket connection to val%d (%s): %v\n", valI, valHost, err) + os.Exit(1) + } + + reconnect := make(chan struct{}) + go func(count int) { + LOOP: + for { + ticker := time.NewTicker(reconnectSleepSeconds) + select { + case <-cli.ResultsCh: + count += 1 + // nTxs == 0 means just loop forever + if nTxs > 0 && count == nTxs { + break LOOP + } + case err := <-cli.ErrorsCh: + fmt.Println("err: val", valI, valHost, err) + case <-cli.Quit: + broadcastTxsToHost(wg, errCh, valI, valHost, nTxs, count) + return + case <-reconnect: + broadcastTxsToHost(wg, errCh, valI, valHost, nTxs, count) + return + case <-ticker.C: + if nTxs == 0 { + cli.Stop() + broadcastTxsToHost(wg, errCh, valI, valHost, nTxs, count) + return + } + } + } + fmt.Printf("Received all responses from node %d (%s)\n", valI, valHost) + wg.Done() + }(txCount) + var i = 0 + for { + /* if i%(nTxs/4) == 0 { + fmt.Printf("Have sent %d txs to node %d. Total time so far: %v\n", i, valI, time.Since(thisStart)) + }*/ + + if !cli.IsRunning() { + return + } + + tx := generateTx(i, valI) + if err := cli.WriteJSON(rpctypes.RPCRequest{ + JSONRPC: "2.0", + ID: "", + Method: "broadcast_tx_async", + Params: []interface{}{hex.EncodeToString(tx)}, + }); err != nil { + fmt.Printf("Error sending tx %d to validator %d: %v. Attempt reconnect\n", i, valI, err) + reconnect <- struct{}{} + return + } + i += 1 + if nTxs > 0 && i >= nTxs { + break + } else if nTxs == 0 { + time.Sleep(time.Millisecond * 1) + } + } + fmt.Printf("Done sending %d txs to node s%d (%s)\n", nTxs, valI, valHost) +} + +func generateTx(i, valI int) []byte { + // a tx encodes the validator index, the tx number, and some random junk + // TODO: read random bytes into more of the tx + tx := make([]byte, 250) + binary.PutUvarint(tx[:32], uint64(valI)) + binary.PutUvarint(tx[32:64], uint64(i)) + if _, err := rand.Read(tx[234:]); err != nil { + fmt.Println("err reading from crypto/rand", err) + os.Exit(1) + } + return tx +}