Skip to content

Commit

Permalink
feat: support profiling block replay during abci handshake (backport #…
Browse files Browse the repository at this point in the history
…14953) (#14963)

Co-authored-by: yihuang <huang@crypto.com>
Co-authored-by: Julien Robert <julien@rbrt.fr>
  • Loading branch information
3 people committed Feb 9, 2023
1 parent d76ad98 commit a393336
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 35 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Ref: https://keepachangelog.com/en/1.0.0/

### Improvements

* (cli) [#14953](https://github.com/cosmos/cosmos-sdk/pull/14953) Enable profiling block replay during abci handshake with `--cpu-profile`.
* (store) [#14410](https://github.com/cosmos/cosmos-sdk/pull/14410) `rootmulti.Store.loadVersion` has validation to check if all the module stores' height is correct, it will error if any module store has incorrect height.
* (store) [#14189](https://github.com/cosmos/cosmos-sdk/pull/14189) Add config `iavl-lazy-loading` to enable lazy loading of iavl store, to improve start up time of archive nodes, add method `SetLazyLoading` to `CommitMultiStore` interface.

Expand Down
85 changes: 50 additions & 35 deletions server/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,11 +141,15 @@ is performed. Note, when enabled, gRPC will also be automatically enabled.
withTM, _ := cmd.Flags().GetBool(flagWithTendermint)
if !withTM {
serverCtx.Logger.Info("starting ABCI without Tendermint")
return startStandAlone(serverCtx, appCreator)
return wrapCPUProfile(serverCtx, func() error {
return startStandAlone(serverCtx, appCreator)
})
}

// amino is needed here for backwards compatibility of REST routes
err = startInProcess(serverCtx, clientCtx, appCreator)
err = wrapCPUProfile(serverCtx, func() error {
return startInProcess(serverCtx, clientCtx, appCreator)
})
errCode, ok := err.(ErrorCode)
if !ok {
return err
Expand Down Expand Up @@ -257,27 +261,6 @@ func startStandAlone(ctx *Context, appCreator types.AppCreator) error {
func startInProcess(ctx *Context, clientCtx client.Context, appCreator types.AppCreator) error {
cfg := ctx.Config
home := cfg.RootDir
var cpuProfileCleanup func()

if cpuProfile := ctx.Viper.GetString(flagCPUProfile); cpuProfile != "" {
f, err := os.Create(cpuProfile)
if err != nil {
return err
}

ctx.Logger.Info("starting CPU profiler", "profile", cpuProfile)
if err := pprof.StartCPUProfile(f); err != nil {
return err
}

cpuProfileCleanup = func() {
ctx.Logger.Info("stopping CPU profiler", "profile", cpuProfile)
pprof.StopCPUProfile()
if err := f.Close(); err != nil {
ctx.Logger.Info("failed to close cpu-profile file", "profile", cpuProfile, "err", err.Error())
}
}
}

db, err := openDB(home, GetAppDBBackend(ctx.Viper))
if err != nil {
Expand All @@ -290,16 +273,11 @@ func startInProcess(ctx *Context, clientCtx client.Context, appCreator types.App
return err
}

// Clean up the traceWriter in the cpuProfileCleanup routine that is invoked
// when the server is shutting down.
fn := cpuProfileCleanup
cpuProfileCleanup = func() {
if fn != nil {
fn()
}

// if flagTraceStore is not used then traceWriter is nil
if traceWriter != nil {
// Clean up the traceWriter when the server is shutting down.
var traceWriterCleanup func()
// if flagTraceStore is not used then traceWriter is nil
if traceWriter != nil {
traceWriterCleanup = func() {
if err = traceWriter.Close(); err != nil {
ctx.Logger.Error("failed to close trace writer", "err", err)
}
Expand Down Expand Up @@ -524,8 +502,8 @@ func startInProcess(ctx *Context, clientCtx client.Context, appCreator types.App
_ = tmNode.Stop()
}

if cpuProfileCleanup != nil {
cpuProfileCleanup()
if traceWriterCleanup != nil {
traceWriterCleanup()
}

if apiSrv != nil {
Expand All @@ -545,3 +523,40 @@ func startTelemetry(cfg serverconfig.Config) (*telemetry.Metrics, error) {
}
return telemetry.New(cfg.Telemetry)
}

// wrapCPUProfile runs callback in a goroutine, then wait for quit signals.
func wrapCPUProfile(ctx *Context, callback func() error) error {
if cpuProfile := ctx.Viper.GetString(flagCPUProfile); cpuProfile != "" {
f, err := os.Create(cpuProfile)
if err != nil {
return err
}

ctx.Logger.Info("starting CPU profiler", "profile", cpuProfile)
if err := pprof.StartCPUProfile(f); err != nil {
return err
}

defer func() {
ctx.Logger.Info("stopping CPU profiler", "profile", cpuProfile)
pprof.StopCPUProfile()
if err := f.Close(); err != nil {
ctx.Logger.Info("failed to close cpu-profile file", "profile", cpuProfile, "err", err.Error())
}
}()
}

errCh := make(chan error)
go func() {
errCh <- callback()
}()

select {
case err := <-errCh:
return err

case <-time.After(types.ServerStartTime):
}

return WaitForQuitSignals()
}

0 comments on commit a393336

Please sign in to comment.