package debug import ( "errors" "fmt" "os" "os/exec" "path/filepath" "strconv" "syscall" "time" "github.com/spf13/cobra" "github.com/spf13/viper" "github.com/tendermint/tendermint/config" "github.com/tendermint/tendermint/libs/cli" rpchttp "github.com/tendermint/tendermint/rpc/client/http" ) var killCmd = &cobra.Command{ Use: "kill [pid] [compressed-output-file]", Short: "Kill a Tendermint process while aggregating and packaging debugging data", Long: `Kill a Tendermint process while also aggregating Tendermint process data such as the latest node state, including consensus and networking state, go-routine state, and the node's WAL and config information. This aggregated data is packaged into a compressed archive. Example: $ tendermint debug kill 34255 /path/to/tm-debug.zip`, Args: cobra.ExactArgs(2), RunE: killCmdHandler, } func killCmdHandler(cmd *cobra.Command, args []string) error { ctx := cmd.Context() pid, err := strconv.ParseInt(args[0], 10, 64) if err != nil { return err } outFile := args[1] if outFile == "" { return errors.New("invalid output file") } rpc, err := rpchttp.New(nodeRPCAddr) if err != nil { return fmt.Errorf("failed to create new http client: %w", err) } home := viper.GetString(cli.HomeFlag) conf := config.DefaultConfig() conf = conf.SetRoot(home) config.EnsureRoot(conf.RootDir) // Create a temporary directory which will contain all the state dumps and // relevant files and directories that will be compressed into a file. tmpDir, err := os.MkdirTemp(os.TempDir(), "tendermint_debug_tmp") if err != nil { return fmt.Errorf("failed to create temporary directory: %w", err) } defer os.RemoveAll(tmpDir) logger.Info("getting node status...") if err := dumpStatus(ctx, rpc, tmpDir, "status.json"); err != nil { return err } logger.Info("getting node network info...") if err := dumpNetInfo(ctx, rpc, tmpDir, "net_info.json"); err != nil { return err } logger.Info("getting node consensus state...") if err := dumpConsensusState(ctx, rpc, tmpDir, "consensus_state.json"); err != nil { return err } logger.Info("copying node WAL...") if err := copyWAL(conf, tmpDir); err != nil { if !os.IsNotExist(err) { return err } logger.Info("node WAL does not exist; continuing...") } logger.Info("copying node configuration...") if err := copyConfig(home, tmpDir); err != nil { return err } logger.Info("killing Tendermint process") if err := killProc(int(pid), tmpDir); err != nil { return err } logger.Info("archiving and compressing debug directory...") return zipDir(tmpDir, outFile) } // killProc attempts to kill the Tendermint process with a given PID with an // ABORT signal which should result in a goroutine stacktrace. The PID's STDERR // is tailed and piped to a file under the directory dir. An error is returned // if the output file cannot be created or the tail command cannot be started. // An error is not returned if any subsequent syscall fails. func killProc(pid int, dir string) error { // pipe STDERR output from tailing the Tendermint process to a file // // NOTE: This will only work on UNIX systems. cmd := exec.Command("tail", "-f", fmt.Sprintf("/proc/%d/fd/2", pid)) // nolint: gosec outFile, err := os.Create(filepath.Join(dir, "stacktrace.out")) if err != nil { return err } defer outFile.Close() cmd.Stdout = outFile cmd.Stderr = outFile if err := cmd.Start(); err != nil { return err } // kill the underlying Tendermint process and subsequent tailing process go func() { // Killing the Tendermint process with the '-ABRT|-6' signal will result in // a goroutine stacktrace. p, err := os.FindProcess(pid) if err != nil { fmt.Fprintf(os.Stderr, "failed to find PID to kill Tendermint process: %s", err) } else if err = p.Signal(syscall.SIGABRT); err != nil { fmt.Fprintf(os.Stderr, "failed to kill Tendermint process: %s", err) } // allow some time to allow the Tendermint process to be killed // // TODO: We should 'wait' for a kill to succeed (e.g. poll for PID until it // cannot be found). Regardless, this should be ample time. time.Sleep(5 * time.Second) if err := cmd.Process.Kill(); err != nil { fmt.Fprintf(os.Stderr, "failed to kill Tendermint process output redirection: %s", err) } }() if err := cmd.Wait(); err != nil { // only return an error not invoked by a manual kill if _, ok := err.(*exec.ExitError); !ok { return err } } return nil }