diff --git a/cmd/tendermint/commands/debug/debug.go b/cmd/tendermint/commands/debug/debug.go index 2bf76babc..478a03d55 100644 --- a/cmd/tendermint/commands/debug/debug.go +++ b/cmd/tendermint/commands/debug/debug.go @@ -2,38 +2,29 @@ package debug import ( "github.com/spf13/cobra" - "github.com/tendermint/tendermint/libs/log" ) -var ( - nodeRPCAddr string - profAddr string - frequency uint - +const ( flagNodeRPCAddr = "rpc-laddr" flagProfAddr = "pprof-laddr" flagFrequency = "frequency" - - logger = log.MustNewDefaultLogger(log.LogFormatPlain, log.LogLevelInfo) ) -// DebugCmd defines the root command containing subcommands that assist in -// debugging running Tendermint processes. -var DebugCmd = &cobra.Command{ - Use: "debug", - Short: "A utility to kill or watch a Tendermint process while aggregating debugging data", -} - -func init() { - DebugCmd.PersistentFlags().SortFlags = true - DebugCmd.PersistentFlags().StringVar( - &nodeRPCAddr, +func GetDebugCommand(logger log.Logger) *cobra.Command { + cmd := &cobra.Command{ + Use: "debug", + Short: "A utility to kill or watch a Tendermint process while aggregating debugging data", + } + cmd.PersistentFlags().SortFlags = true + cmd.PersistentFlags().String( flagNodeRPCAddr, "tcp://localhost:26657", - "the Tendermint node's RPC address (:)", + "the Tendermint node's RPC address :)", ) - DebugCmd.AddCommand(killCmd) - DebugCmd.AddCommand(dumpCmd) + cmd.AddCommand(getKillCmd(logger)) + cmd.AddCommand(getDumpCmd(logger)) + return cmd + } diff --git a/cmd/tendermint/commands/debug/dump.go b/cmd/tendermint/commands/debug/dump.go index 0fb5c0f1a..d84f6e10a 100644 --- a/cmd/tendermint/commands/debug/dump.go +++ b/cmd/tendermint/commands/debug/dump.go @@ -13,78 +13,102 @@ import ( "github.com/tendermint/tendermint/config" "github.com/tendermint/tendermint/libs/cli" + "github.com/tendermint/tendermint/libs/log" rpchttp "github.com/tendermint/tendermint/rpc/client/http" ) -var dumpCmd = &cobra.Command{ - Use: "dump [output-directory]", - Short: "Continuously poll a Tendermint process and dump debugging data into a single location", - Long: `Continuously poll a Tendermint process and dump debugging data into a single +func getDumpCmd(logger log.Logger) *cobra.Command { + cmd := &cobra.Command{ + Use: "dump [output-directory]", + Short: "Continuously poll a Tendermint process and dump debugging data into a single location", + Long: `Continuously poll a Tendermint process and dump debugging data into a single location at a specified frequency. At each frequency interval, an archived and compressed file will contain node debugging information including the goroutine and heap profiles if enabled.`, - Args: cobra.ExactArgs(1), - RunE: dumpCmdHandler, -} - -func init() { - dumpCmd.Flags().UintVar( - &frequency, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + outDir := args[0] + if outDir == "" { + return errors.New("invalid output directory") + } + frequency, err := cmd.Flags().GetUint(flagFrequency) + if err != nil { + return fmt.Errorf("flag %q not defined: %w", flagFrequency, err) + } + + if frequency == 0 { + return errors.New("frequency must be positive") + } + + nodeRPCAddr, err := cmd.Flags().GetString(flagNodeRPCAddr) + if err != nil { + return fmt.Errorf("flag %q not defined: %w", flagNodeRPCAddr, err) + } + + profAddr, err := cmd.Flags().GetString(flagProfAddr) + if err != nil { + return fmt.Errorf("flag %q not defined: %w", flagProfAddr, err) + } + + if _, err := os.Stat(outDir); os.IsNotExist(err) { + if err := os.Mkdir(outDir, os.ModePerm); err != nil { + return fmt.Errorf("failed to create output directory: %w", err) + } + } + + rpc, err := rpchttp.New(nodeRPCAddr) + if err != nil { + return fmt.Errorf("failed to create new http client: %w", err) + } + + ctx := cmd.Context() + + home := viper.GetString(cli.HomeFlag) + conf := config.DefaultConfig() + conf = conf.SetRoot(home) + config.EnsureRoot(conf.RootDir) + + dumpArgs := dumpDebugDataArgs{ + conf: conf, + outDir: outDir, + profAddr: profAddr, + } + dumpDebugData(ctx, logger, rpc, dumpArgs) + + ticker := time.NewTicker(time.Duration(frequency) * time.Second) + for range ticker.C { + dumpDebugData(ctx, logger, rpc, dumpArgs) + } + + return nil + }, + } + cmd.Flags().Uint( flagFrequency, 30, "the frequency (seconds) in which to poll, aggregate and dump Tendermint debug data", ) - dumpCmd.Flags().StringVar( - &profAddr, + cmd.Flags().String( flagProfAddr, "", "the profiling server address (:)", ) -} - -func dumpCmdHandler(cmd *cobra.Command, args []string) error { - outDir := args[0] - if outDir == "" { - return errors.New("invalid output directory") - } - - if frequency == 0 { - return errors.New("frequency must be positive") - } - - if _, err := os.Stat(outDir); os.IsNotExist(err) { - if err := os.Mkdir(outDir, os.ModePerm); err != nil { - return fmt.Errorf("failed to create output directory: %w", err) - } - } - - rpc, err := rpchttp.New(nodeRPCAddr) - if err != nil { - return fmt.Errorf("failed to create new http client: %w", err) - } - ctx := cmd.Context() + return cmd - home := viper.GetString(cli.HomeFlag) - conf := config.DefaultConfig() - conf = conf.SetRoot(home) - config.EnsureRoot(conf.RootDir) - - dumpDebugData(ctx, outDir, conf, rpc) - - ticker := time.NewTicker(time.Duration(frequency) * time.Second) - for range ticker.C { - dumpDebugData(ctx, outDir, conf, rpc) - } +} - return nil +type dumpDebugDataArgs struct { + conf *config.Config + outDir string + profAddr string } -func dumpDebugData(ctx context.Context, outDir string, conf *config.Config, rpc *rpchttp.HTTP) { +func dumpDebugData(ctx context.Context, logger log.Logger, rpc *rpchttp.HTTP, args dumpDebugDataArgs) { start := time.Now().UTC() - tmpDir, err := os.MkdirTemp(outDir, "tendermint_debug_tmp") + tmpDir, err := os.MkdirTemp(args.outDir, "tendermint_debug_tmp") if err != nil { logger.Error("failed to create temporary directory", "dir", tmpDir, "error", err) return @@ -110,26 +134,26 @@ func dumpDebugData(ctx context.Context, outDir string, conf *config.Config, rpc } logger.Info("copying node WAL...") - if err := copyWAL(conf, tmpDir); err != nil { + if err := copyWAL(args.conf, tmpDir); err != nil { logger.Error("failed to copy node WAL", "error", err) return } - if profAddr != "" { + if args.profAddr != "" { logger.Info("getting node goroutine profile...") - if err := dumpProfile(tmpDir, profAddr, "goroutine", 2); err != nil { + if err := dumpProfile(tmpDir, args.profAddr, "goroutine", 2); err != nil { logger.Error("failed to dump goroutine profile", "error", err) return } logger.Info("getting node heap profile...") - if err := dumpProfile(tmpDir, profAddr, "heap", 2); err != nil { + if err := dumpProfile(tmpDir, args.profAddr, "heap", 2); err != nil { logger.Error("failed to dump heap profile", "error", err) return } } - outFile := filepath.Join(outDir, fmt.Sprintf("%s.zip", start.Format(time.RFC3339))) + outFile := filepath.Join(args.outDir, fmt.Sprintf("%s.zip", start.Format(time.RFC3339))) if err := zipDir(tmpDir, outFile); err != nil { logger.Error("failed to create and compress archive", "file", outFile, "error", err) } diff --git a/cmd/tendermint/commands/debug/kill.go b/cmd/tendermint/commands/debug/kill.go index 18351db12..a6c1ac7d8 100644 --- a/cmd/tendermint/commands/debug/kill.go +++ b/cmd/tendermint/commands/debug/kill.go @@ -15,89 +15,96 @@ import ( "github.com/tendermint/tendermint/config" "github.com/tendermint/tendermint/libs/cli" + "github.com/tendermint/tendermint/libs/log" rpchttp "github.com/tendermint/tendermint/rpc/client/http" ) -var killCmd = &cobra.Command{ - Use: "kill [pid] [compressed-output-file]", - Short: "Kill a Tendermint process while aggregating and packaging debugging data", - Long: `Kill a Tendermint process while also aggregating Tendermint process data +func getKillCmd(logger log.Logger) *cobra.Command { + cmd := &cobra.Command{ + Use: "kill [pid] [compressed-output-file]", + Short: "Kill a Tendermint process while aggregating and packaging debugging data", + Long: `Kill a Tendermint process while also aggregating Tendermint process data such as the latest node state, including consensus and networking state, go-routine state, and the node's WAL and config information. This aggregated data is packaged into a compressed archive. Example: $ tendermint debug kill 34255 /path/to/tm-debug.zip`, - Args: cobra.ExactArgs(2), - RunE: killCmdHandler, -} - -func killCmdHandler(cmd *cobra.Command, args []string) error { - ctx := cmd.Context() - pid, err := strconv.ParseInt(args[0], 10, 64) - if err != nil { - return err - } - - outFile := args[1] - if outFile == "" { - return errors.New("invalid output file") - } - - rpc, err := rpchttp.New(nodeRPCAddr) - if err != nil { - return fmt.Errorf("failed to create new http client: %w", err) - } - - home := viper.GetString(cli.HomeFlag) - conf := config.DefaultConfig() - conf = conf.SetRoot(home) - config.EnsureRoot(conf.RootDir) - - // Create a temporary directory which will contain all the state dumps and - // relevant files and directories that will be compressed into a file. - tmpDir, err := os.MkdirTemp(os.TempDir(), "tendermint_debug_tmp") - if err != nil { - return fmt.Errorf("failed to create temporary directory: %w", err) - } - defer os.RemoveAll(tmpDir) - - logger.Info("getting node status...") - if err := dumpStatus(ctx, rpc, tmpDir, "status.json"); err != nil { - return err - } - - logger.Info("getting node network info...") - if err := dumpNetInfo(ctx, rpc, tmpDir, "net_info.json"); err != nil { - return err - } - - logger.Info("getting node consensus state...") - if err := dumpConsensusState(ctx, rpc, tmpDir, "consensus_state.json"); err != nil { - return err - } - - logger.Info("copying node WAL...") - if err := copyWAL(conf, tmpDir); err != nil { - if !os.IsNotExist(err) { - return err - } - - logger.Info("node WAL does not exist; continuing...") - } - - logger.Info("copying node configuration...") - if err := copyConfig(home, tmpDir); err != nil { - return err - } - - logger.Info("killing Tendermint process") - if err := killProc(int(pid), tmpDir); err != nil { - return err + Args: cobra.ExactArgs(2), + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + pid, err := strconv.ParseInt(args[0], 10, 64) + if err != nil { + return err + } + + outFile := args[1] + if outFile == "" { + return errors.New("invalid output file") + } + nodeRPCAddr, err := cmd.Flags().GetString(flagNodeRPCAddr) + if err != nil { + return fmt.Errorf("flag %q not defined: %w", flagNodeRPCAddr, err) + } + + rpc, err := rpchttp.New(nodeRPCAddr) + if err != nil { + return fmt.Errorf("failed to create new http client: %w", err) + } + + home := viper.GetString(cli.HomeFlag) + conf := config.DefaultConfig() + conf = conf.SetRoot(home) + config.EnsureRoot(conf.RootDir) + + // Create a temporary directory which will contain all the state dumps and + // relevant files and directories that will be compressed into a file. + tmpDir, err := os.MkdirTemp(os.TempDir(), "tendermint_debug_tmp") + if err != nil { + return fmt.Errorf("failed to create temporary directory: %w", err) + } + defer os.RemoveAll(tmpDir) + + logger.Info("getting node status...") + if err := dumpStatus(ctx, rpc, tmpDir, "status.json"); err != nil { + return err + } + + logger.Info("getting node network info...") + if err := dumpNetInfo(ctx, rpc, tmpDir, "net_info.json"); err != nil { + return err + } + + logger.Info("getting node consensus state...") + if err := dumpConsensusState(ctx, rpc, tmpDir, "consensus_state.json"); err != nil { + return err + } + + logger.Info("copying node WAL...") + if err := copyWAL(conf, tmpDir); err != nil { + if !os.IsNotExist(err) { + return err + } + + logger.Info("node WAL does not exist; continuing...") + } + + logger.Info("copying node configuration...") + if err := copyConfig(home, tmpDir); err != nil { + return err + } + + logger.Info("killing Tendermint process") + if err := killProc(int(pid), tmpDir); err != nil { + return err + } + + logger.Info("archiving and compressing debug directory...") + return zipDir(tmpDir, outFile) + }, } - logger.Info("archiving and compressing debug directory...") - return zipDir(tmpDir, outFile) + return cmd } // killProc attempts to kill the Tendermint process with a given PID with an diff --git a/cmd/tendermint/main.go b/cmd/tendermint/main.go index 91ee89bea..90146fecd 100644 --- a/cmd/tendermint/main.go +++ b/cmd/tendermint/main.go @@ -43,7 +43,7 @@ func main() { commands.MakeInspectCommand(conf, logger), commands.MakeRollbackStateCommand(conf), commands.MakeKeyMigrateCommand(conf, logger), - debug.DebugCmd, + debug.GetDebugCommand(logger), commands.NewCompletionCmd(rcmd, true), )