From 5a47154f6c05e2783a109678c7d3b3c1a00b24df Mon Sep 17 00:00:00 2001 From: Anil Kumar Kammari Date: Fri, 6 Aug 2021 01:30:08 +0530 Subject: [PATCH] feat: Add backup option for cosmovisor (#9652) ## Description Ref: https://github.com/cosmos/cosmos-sdk/issues/9616#issuecomment-873051972 depends: #8590 This PR adds a full backup option for cosmovisor. `UNSAFE_SKIP_BACKUP` is an `env` setting introduced newly. - if `false` (default, **recommended**), cosmovisor will try to take backup and then upgrade. In case of failure while taking backup, it will just halt the process there and won't try the upgrade. - If `true`, the cosmovisor will try to upgrade without any backup. This setting makes it hard to recover from a failed upgrade. Node operators either need to sync from a healthy node or use a snapshot from others. --- ### Author Checklist *All items are required. Please add a note to the item if the item is not applicable and please add links to any relevant follow up issues.* I have... - [x] included the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] added `!` to the type prefix if API or client breaking change - [ ] targeted the correct branch (see [PR Targeting](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#pr-targeting)) - [x] provided a link to the relevant issue or specification - [ ] followed the guidelines for [building modules](https://github.com/cosmos/cosmos-sdk/blob/master/docs/building-modules) - [ ] included the necessary unit and integration [tests](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#testing) - [ ] added a changelog entry to `CHANGELOG.md` - [x] included comments for [documenting Go code](https://blog.golang.org/godoc) - [x] updated the relevant documentation or specification - [x] reviewed "Files changed" and left comments if necessary - [ ] confirmed all CI checks have passed ### Reviewers Checklist *All items are required. Please add a note if the item is not applicable and please add your handle next to the items reviewed if you only reviewed selected items.* I have... - [ ] confirmed the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] confirmed `!` in the type prefix if API or client breaking change - [ ] confirmed all author checklist items have been addressed - [ ] reviewed state machine logic - [ ] reviewed API design and naming - [ ] reviewed documentation is accurate - [ ] reviewed tests and test coverage - [ ] manually tested (if applicable) --- cosmovisor/README.md | 1 + cosmovisor/args.go | 3 ++ cosmovisor/cmd/cosmovisor/main.go | 1 + cosmovisor/process.go | 53 +++++++++++++++++++++++++++++++ cosmovisor/process_test.go | 4 +-- 5 files changed, 60 insertions(+), 2 deletions(-) diff --git a/cosmovisor/README.md b/cosmovisor/README.md index e263966a49a0..5b3aa5d2cb11 100644 --- a/cosmovisor/README.md +++ b/cosmovisor/README.md @@ -22,6 +22,7 @@ All arguments passed to `cosmovisor` will be passed to the application binary (a * `DAEMON_NAME` is the name of the binary itself (e.g. `gaiad`, `regend`, `simd`, etc.). * `DAEMON_ALLOW_DOWNLOAD_BINARIES` (*optional*), if set to `true`, will enable auto-downloading of new binaries (for security reasons, this is intended for full nodes rather than validators). By default, `cosmovisor` will not auto-download new binaries. * `DAEMON_RESTART_AFTER_UPGRADE` (*optional*), if set to `true`, will restart the subprocess with the same command-line arguments and flags (but with the new binary) after a successful upgrade. By default, `cosmovisor` stops running after an upgrade and requires the system administrator to manually restart it. Note that `cosmovisor` will not auto-restart the subprocess if there was an error. +* `UNSAFE_SKIP_BACKUP` (defaults to `false`), if set to `false`, will backup the data before trying the upgrade. Otherwise it will upgrade directly without doing any backup. This is useful (and recommended) in case of failures and when needed to rollback. It is advised to use backup option, i.e., `UNSAFE_SKIP_BACKUP=false` ## Folder Layout diff --git a/cosmovisor/args.go b/cosmovisor/args.go index 60cbb7d60167..c5b4d71af58c 100644 --- a/cosmovisor/args.go +++ b/cosmovisor/args.go @@ -24,6 +24,7 @@ type Config struct { AllowDownloadBinaries bool RestartAfterUpgrade bool LogBufferSize int + UnsafeSkipBackup bool } // Root returns the root directory where all info lives @@ -113,6 +114,8 @@ func GetConfigFromEnv() (*Config, error) { cfg.LogBufferSize = bufio.MaxScanTokenSize } + cfg.UnsafeSkipBackup = os.Getenv("UNSAFE_SKIP_BACKUP") == "true" + if err := cfg.validate(); err != nil { return nil, err } diff --git a/cosmovisor/cmd/cosmovisor/main.go b/cosmovisor/cmd/cosmovisor/main.go index a165acab38f6..f02f1190d522 100644 --- a/cosmovisor/cmd/cosmovisor/main.go +++ b/cosmovisor/cmd/cosmovisor/main.go @@ -22,6 +22,7 @@ func Run(args []string) error { } doUpgrade, err := cosmovisor.LaunchProcess(cfg, args, os.Stdout, os.Stderr) + // if RestartAfterUpgrade, we launch after a successful upgrade (only condition LaunchProcess returns nil) for cfg.RestartAfterUpgrade && err == nil && doUpgrade { doUpgrade, err = cosmovisor.LaunchProcess(cfg, args, os.Stdout, os.Stderr) diff --git a/cosmovisor/process.go b/cosmovisor/process.go index 6a67f65e162e..2058c72f4384 100644 --- a/cosmovisor/process.go +++ b/cosmovisor/process.go @@ -2,15 +2,21 @@ package cosmovisor import ( "bufio" + "encoding/json" "fmt" "io" + "io/ioutil" "log" "os" "os/exec" "os/signal" + "path/filepath" "strings" "sync" "syscall" + "time" + + "github.com/otiai10/copy" ) // LaunchProcess runs a subprocess and returns when the subprocess exits, @@ -70,12 +76,59 @@ func LaunchProcess(cfg *Config, args []string, stdout, stderr io.Writer) (bool, } if upgradeInfo != nil { + if err := doBackup(cfg); err != nil { + return false, err + } + return true, DoUpgrade(cfg, upgradeInfo) } return false, nil } +func doBackup(cfg *Config) error { + // take backup if `UNSAFE_SKIP_BACKUP` is not set. + if !cfg.UnsafeSkipBackup { + // check if upgrade-info.json is not empty. + var uInfo UpgradeInfo + upgradeInfoFile, err := ioutil.ReadFile(filepath.Join(cfg.Home, "data", "upgrade-info.json")) + if err != nil { + return fmt.Errorf("error while reading upgrade-info.json: %w", err) + } + + err = json.Unmarshal(upgradeInfoFile, &uInfo) + if err != nil { + return err + } + + if uInfo.Name == "" { + return fmt.Errorf("upgrade-info.json is empty") + } + + // a destination directory, Format YYYY-MM-DD + st := time.Now() + stStr := fmt.Sprintf("%d-%d-%d", st.Year(), st.Month(), st.Day()) + dst := filepath.Join(cfg.Home, fmt.Sprintf("data"+"-backup-%s", stStr)) + + fmt.Printf("starting to take backup of data directory at time %s", st) + + // copy the $DAEMON_HOME/data to a backup dir + err = copy.Copy(filepath.Join(cfg.Home, "data"), dst) + + if err != nil { + return fmt.Errorf("error while taking data backup: %w", err) + } + + // backup is done, lets check endtime to calculate total time taken for backup process + et := time.Now() + timeTaken := et.Sub(st) + fmt.Printf("backup saved at location: %s, completed at time: %s\n"+ + "time taken to complete the backup: %s", dst, et, timeTaken) + } + + return nil +} + // WaitResult is used to wrap feedback on cmd state with some mutex logic. // This is needed as multiple go-routines can affect this - two read pipes that can trigger upgrade // As well as the command, which can fail diff --git a/cosmovisor/process_test.go b/cosmovisor/process_test.go index 6dc964f21ee0..0b966b22bc64 100644 --- a/cosmovisor/process_test.go +++ b/cosmovisor/process_test.go @@ -23,7 +23,7 @@ func TestProcessTestSuite(t *testing.T) { // and args are passed through func (s *processTestSuite) TestLaunchProcess() { home := copyTestData(s.T(), "validate") - cfg := &cosmovisor.Config{Home: home, Name: "dummyd"} + cfg := &cosmovisor.Config{Home: home, Name: "dummyd", UnsafeSkipBackup: true} // should run the genesis binary and produce expected output var stdout, stderr bytes.Buffer @@ -65,7 +65,7 @@ func (s *processTestSuite) TestLaunchProcessWithDownloads() { // zip_binary -> "chain3" = ref_zipped -> zip_directory // zip_directory no upgrade home := copyTestData(s.T(), "download") - cfg := &cosmovisor.Config{Home: home, Name: "autod", AllowDownloadBinaries: true} + cfg := &cosmovisor.Config{Home: home, Name: "autod", AllowDownloadBinaries: true, UnsafeSkipBackup: true} // should run the genesis binary and produce expected output var stdout, stderr bytes.Buffer