diff --git a/.github/workflows/build_targets.yaml b/.github/workflows/build_targets.yaml index f442b52572..3da9d0fab4 100644 --- a/.github/workflows/build_targets.yaml +++ b/.github/workflows/build_targets.yaml @@ -100,7 +100,7 @@ jobs: key: ${{ inputs.images-cache-key }} - name: Upload saved images - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: ${{ inputs.images-artifact-name }} path: ./output/saved-images @@ -134,7 +134,7 @@ jobs: key: ${{ inputs.e2e-binary-cache-key }} - name: Upload e2e binary - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: ${{ inputs.e2e-binary-name }} path: ./e2e-test/image/e2e/bin @@ -169,7 +169,7 @@ jobs: echo "chart-name=$chart_name" >> $GITHUB_OUTPUT - name: Upload chart - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: ${{ inputs.chart-artifact-name }} path: ${{ steps.build-chart.outputs.chart-path }} diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 697b5ee43a..d4d761cce4 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -49,14 +49,14 @@ jobs: steps: - name: Download saved images id: download-images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4 with: name: ${{ needs.build-targets.outputs.images-artifact-name }} path: ./output/${{ needs.build-targets.outputs.images-artifact-name }} - name: Download chart id: download-chart - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4 with: name: ${{ needs.build-targets.outputs.chart-artifact-name }} path: ./output/${{ needs.build-targets.outputs.chart-artifact-name }} diff --git a/.github/workflows/e2e_test.yaml b/.github/workflows/e2e_test.yaml index 78c7eca7d2..29575bd3a5 100644 --- a/.github/workflows/e2e_test.yaml +++ b/.github/workflows/e2e_test.yaml @@ -40,13 +40,13 @@ jobs: uses: actions/checkout@v2 - name: download saved images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4 with: name: ${{ inputs.images-artifact-name }} path: ./output/saved-images - name: download e2e binary - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4 with: name: ${{ inputs.e2e-binary-name }} path: ./output/e2e-binary diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml index e2a7b734dd..e5092c8212 100644 --- a/.github/workflows/integration_test.yaml +++ b/.github/workflows/integration_test.yaml @@ -22,7 +22,7 @@ jobs: - name: Download saved images id: download-images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4 with: name: ${{ inputs.images-artifact-name }} path: ./output/${{ inputs.images-artifact-name }} @@ -85,7 +85,7 @@ jobs: - name: Post run - upload kubernetes cluster info dump if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: integration-test-kubernetes-cluster-info-dump path: cluster-info-dump diff --git a/api/v1alpha1/cloudstackhostchaos_types.go b/api/v1alpha1/cloudstackhostchaos_types.go index b82f0bb217..e91ec9283a 100644 --- a/api/v1alpha1/cloudstackhostchaos_types.go +++ b/api/v1alpha1/cloudstackhostchaos_types.go @@ -80,6 +80,15 @@ type CloudStackHostChaosSpec struct { // CloudStackHostChaosStatus represents the status of a CloudStackChaos. type CloudStackHostChaosStatus struct { ChaosStatus `json:",inline"` + + // Instances keeps track of the affected hosts and vms + // +optional + Instances map[string]CloudStackHostAffected `json:"affectedHosts,omitempty"` +} + +type CloudStackHostAffected struct { + Name string `json:"name,omitempty"` + VMs []string `json:"vms,omitempty"` } type CloudStackHostChaosSelector struct { @@ -116,3 +125,7 @@ func (selector *CloudStackHostChaosSelector) Id() string { func (obj *CloudStackHostChaos) GetSelectorSpecs() map[string]interface{} { return map[string]interface{}{".": &obj.Spec.Selector} } + +func (obj *CloudStackHostChaos) GetCustomStatus() interface{} { + return &obj.Status.Instances +} diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 4634e997bd..8db9d8cfbb 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -906,6 +906,26 @@ func (in *CloudStackAPIConfig) DeepCopy() *CloudStackAPIConfig { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CloudStackHostAffected) DeepCopyInto(out *CloudStackHostAffected) { + *out = *in + if in.VMs != nil { + in, out := &in.VMs, &out.VMs + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CloudStackHostAffected. +func (in *CloudStackHostAffected) DeepCopy() *CloudStackHostAffected { + if in == nil { + return nil + } + out := new(CloudStackHostAffected) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *CloudStackHostChaos) DeepCopyInto(out *CloudStackHostChaos) { *out = *in @@ -1036,6 +1056,13 @@ func (in *CloudStackHostChaosSpec) DeepCopy() *CloudStackHostChaosSpec { func (in *CloudStackHostChaosStatus) DeepCopyInto(out *CloudStackHostChaosStatus) { *out = *in in.ChaosStatus.DeepCopyInto(&out.ChaosStatus) + if in.Instances != nil { + in, out := &in.Instances, &out.Instances + *out = make(map[string]CloudStackHostAffected, len(*in)) + for key, val := range *in { + (*out)[key] = *val.DeepCopy() + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CloudStackHostChaosStatus. diff --git a/config/crd/bases/chaos-mesh.org_cloudstackhostchaos.yaml b/config/crd/bases/chaos-mesh.org_cloudstackhostchaos.yaml index ab46f6eff1..10bbd51136 100644 --- a/config/crd/bases/chaos-mesh.org_cloudstackhostchaos.yaml +++ b/config/crd/bases/chaos-mesh.org_cloudstackhostchaos.yaml @@ -120,6 +120,18 @@ spec: status: description: CloudStackHostChaosStatus represents the status of a CloudStackChaos. properties: + affectedHosts: + additionalProperties: + properties: + name: + type: string + vms: + items: + type: string + type: array + type: object + description: Instances keeps track of the affected hosts and vms + type: object conditions: description: Conditions represents the current global condition of the chaos diff --git a/controllers/chaosimpl/cloudstackhost/hoststop/impl.go b/controllers/chaosimpl/cloudstackhost/hoststop/impl.go index 6dfb5c672b..5c2c3b767a 100644 --- a/controllers/chaosimpl/cloudstackhost/hoststop/impl.go +++ b/controllers/chaosimpl/cloudstackhost/hoststop/impl.go @@ -17,10 +17,9 @@ package hoststop import ( "context" + "crypto/rand" "encoding/json" - "math/rand" - "strings" - "sync" + "math/big" "time" "github.com/apache/cloudstack-go/v2/cloudstack" @@ -47,23 +46,35 @@ const ( StateUp = "Up" StateRunning = "Running" StateStopped = "Stopped" + + HostStartingPhase = "Injected/HostStarting" + HostStartedPhase = "Injected/HostStarted" + VMsStartedPhase = "Injected/VMsStarted" + NodesReadyPhase = "Injected/NodesReady" + NodesUncordonedPhase = "Injected/NodesUncordoned" ) -var retryOpts = []retry.Option{retry.Attempts(12), retry.Delay(5 * time.Second), retry.DelayType(retry.FixedDelay), retry.LastErrorOnly(true)} -var waitRetryOpts = []retry.Option{retry.Attempts(20), retry.Delay(30 * time.Second), retry.DelayType(retry.FixedDelay), retry.LastErrorOnly(true)} +var retryOpts = []retry.Option{retry.Attempts(5), retry.Delay(1 * time.Second), retry.DelayType(retry.FixedDelay), retry.LastErrorOnly(true)} func (impl *Impl) Apply(ctx context.Context, index int, records []*v1alpha1.Record, obj v1alpha1.InnerObject) (v1alpha1.Phase, error) { - cloudstackchaos := obj.(*v1alpha1.CloudStackHostChaos) - spec := cloudstackchaos.Spec + chaos, ok := obj.(*v1alpha1.CloudStackHostChaos) + if !ok { + return v1alpha1.NotInjected, errors.New("chaos is not CloudstackHostChaos") + } + if chaos.Status.Instances == nil { + chaos.Status.Instances = make(map[string]v1alpha1.CloudStackHostAffected) + } + spec := chaos.Spec + record := records[index] - client, err := utils.GetCloudStackClient(ctx, impl.Client, cloudstackchaos) + client, err := utils.GetCloudStackClient(ctx, impl.Client, chaos) if err != nil { return v1alpha1.NotInjected, errors.Wrap(err, "creating cloudstack api client") } var selector v1alpha1.CloudStackHostChaosSelector - if err := json.Unmarshal([]byte(records[index].Id), &selector); err != nil { - return v1alpha1.NotInjected, errors.Wrapf(err, "decoding selector: %s", records[index].Id) + if err := json.Unmarshal([]byte(record.Id), &selector); err != nil { + return v1alpha1.NotInjected, errors.Wrapf(err, "decoding selector: %s", record.Id) } params := utils.SelectorToListParams(&selector) @@ -72,21 +83,44 @@ func (impl *Impl) Apply(ctx context.Context, index int, records []*v1alpha1.Reco resp, err := retry.DoWithData(func() (*cloudstack.ListHostsResponse, error) { return client.Host.ListHosts(params) - }) + }, retryOpts...) + if err != nil { - impl.Log.Error(err, "Failed to list matching hosts", "selector", records[index].Id) + impl.Log.Error(err, "Failed to list matching hosts", "selector", record.Id) return v1alpha1.NotInjected, errors.Wrap(err, "listing hosts") } if len(resp.Hosts) == 0 { - impl.Log.Info("No hosts matching criteria") + impl.Log.Info("No hosts matching criteria", "criteria", record.Id) return v1alpha1.Injected, nil } - h := resp.Hosts[rand.Intn(len(resp.Hosts))] + h := randomHost(resp.Hosts) - impl.Log.Info("Stopping host", "id", h.Id, "name", h.Name, "dry-run", spec.DryRun) + vmResp, err := retry.DoWithData(func() (*cloudstack.ListVirtualMachinesResponse, error) { + params := client.VirtualMachine.NewListVirtualMachinesParams() + params.SetHostid(h.Id) + return client.VirtualMachine.ListVirtualMachines(params) + }, retryOpts...) + if err != nil { + return v1alpha1.NotInjected, errors.Wrapf(err, "list vms on host %s", h.Name) + } + vms := []string{} + for _, vm := range vmResp.VirtualMachines { + vms = append(vms, vm.Name) + } + isActive, err := impl.isClusterActive(ctx, vms) + if err != nil { + return v1alpha1.NotInjected, errors.Wrapf(err, "check if cluster is active %s", h.Name) + } + if !isActive { + impl.Log.Info("Cluster inactive, skipping", "name", h.Name) + return v1alpha1.Injected, nil + } + + chaos.Status.Instances[record.Id] = v1alpha1.CloudStackHostAffected{Name: h.Name, VMs: vms} + impl.Log.Info("Stopping host", "id", h.Id, "name", h.Name, "dry-run", spec.DryRun) if !spec.DryRun { params := client.OutofbandManagement.NewIssueOutOfBandManagementPowerActionParams(ActionOff, h.Id) if err := retry.Do(func() error { @@ -104,66 +138,81 @@ func (impl *Impl) Apply(ctx context.Context, index int, records []*v1alpha1.Reco } func (impl *Impl) Recover(ctx context.Context, index int, records []*v1alpha1.Record, obj v1alpha1.InnerObject) (v1alpha1.Phase, error) { - impl.Log.Info("Starting hypervisor recovery") - - cloudstackchaos := obj.(*v1alpha1.CloudStackHostChaos) - spec := cloudstackchaos.Spec - - client, err := utils.GetCloudStackClient(ctx, impl.Client, cloudstackchaos) - if err != nil { - return v1alpha1.Injected, errors.Wrap(err, "creating cloudstack api client") + chaos, ok := obj.(*v1alpha1.CloudStackHostChaos) + if !ok { + return v1alpha1.NotInjected, errors.New("chaos is not CloudstackHostChaos") } + record := records[index] - var selector v1alpha1.CloudStackHostChaosSelector - if err := json.Unmarshal([]byte(records[index].Id), &selector); err != nil { - return v1alpha1.Injected, errors.Wrapf(err, "decoding selector: %s", records[index].Id) + affected := chaos.Status.Instances[record.Id] + if affected.Name == "" { + impl.Log.Info("Nothing to recover") + return v1alpha1.NotInjected, nil } - params := utils.SelectorToListParams(&selector) - params.SetOutofbandmanagementenabled(true) - params.SetOutofbandmanagementpowerstate("Off") + hostName := affected.Name + vms := affected.VMs + spec := chaos.Spec - resp, err := retry.DoWithData(func() (*cloudstack.ListHostsResponse, error) { - return client.Host.ListHosts(params) - }, retryOpts...) + if spec.DryRun { + impl.Log.Info("Hypervisor recovery dry run", "host", hostName, "vms", vms) + return v1alpha1.NotInjected, nil + } + + client, err := utils.GetCloudStackClient(ctx, impl.Client, chaos) if err != nil { - impl.Log.Error(err, "Failed to list offline hosts", "selector", records[index].Id) - return v1alpha1.Injected, errors.Wrap(err, "listing hosts") + return v1alpha1.Injected, errors.Wrap(err, "creating cloudstack api client") } - for _, h := range resp.Hosts { - impl.Log.Info("Starting host", "id", h.Id, "name", h.Name, "dry-run", spec.DryRun) + switch record.Phase { + case v1alpha1.Injected: + impl.Log.Info("Starting hypervisor recovery", "host", hostName, "vms", vms) + if err := impl.startHost(client, hostName); err != nil { + return v1alpha1.Injected, err + } + return HostStartingPhase, nil - if spec.DryRun { - continue + case HostStartingPhase: + if err := impl.ensureStartedHost(client, hostName); err != nil { + return HostStartingPhase, err } - if err := retry.Do(func() error { - _, err := client.OutofbandManagement.IssueOutOfBandManagementPowerAction(client.OutofbandManagement.NewIssueOutOfBandManagementPowerActionParams(ActionOn, h.Id)) - return err - }, retryOpts...); err != nil { - impl.Log.Error(err, "Failed to start host", "host", h.Name) - return v1alpha1.Injected, errors.Wrapf(err, "starting host %s", h.Name) + + return HostStartedPhase, nil + + case HostStartedPhase: + if err := impl.startVMs(client, vms); err != nil { + return HostStartedPhase, errors.Wrapf(err, "failed to start vms on host %s", hostName) } - if err := waitForHostToBeUp(client, h.Id); err != nil { - impl.Log.Error(err, "Host failed to start", "host", h.Name) - return v1alpha1.Injected, err + return VMsStartedPhase, nil + + case VMsStartedPhase: + if err := impl.ensureK8sNodesReady(ctx, vms); err != nil { + // jump back to HostStartedPhase to make sure all VMs are started + return HostStartedPhase, err } - impl.Log.Info("Started host", "id", h.Id, "name", h.Name) - } + return NodesReadyPhase, nil - if err := impl.startVMs(client, spec.DryRun); err != nil { - return v1alpha1.Injected, err - } - if err := impl.destroyStuckSystemVMs(client, spec.DryRun); err != nil { - return v1alpha1.Injected, err - } - if err := impl.uncordonK8sNodes(ctx, spec.DryRun); err != nil { - return v1alpha1.Injected, err - } + case NodesReadyPhase: + impl.Log.Info("Will uncordon ready nodes", "host", hostName) + if err := impl.uncordonK8sNodes(ctx, vms); err != nil { + // jump back to HostStartedPhase to make sure all VMs are started + return HostStartedPhase, errors.Wrapf(err, "failed to uncordon nodes on host %s", hostName) + } + + return NodesUncordonedPhase, nil - return v1alpha1.NotInjected, nil + case NodesUncordonedPhase: + impl.Log.Info("Will destroy stuck system VMs", "host", hostName) + if err := impl.destroyStuckSystemVMs(client); err != nil { + return NodesReadyPhase, errors.Wrap(err, "failed to destroy stuck system VMs") + } + return v1alpha1.NotInjected, nil + + default: + panic("unknown recovery phase: " + record.Phase) + } } func NewImpl(c client.Client, log logr.Logger) *Impl { @@ -173,61 +222,71 @@ func NewImpl(c client.Client, log logr.Logger) *Impl { } } -func waitForVmToBeRunning(client *cloudstack.CloudStackClient, vmId string) error { - return retry.Do(func() error { - vm, _, err := client.VirtualMachine.GetVirtualMachineByID(vmId) - if err != nil { - return errors.Wrapf(err, "failed to query status for vm %s", vmId) +func isK8sNodeReady(node v1.Node) bool { + for _, condition := range node.Status.Conditions { + if condition.Type == v1.NodeReady && condition.Status != v1.ConditionTrue { + return false } - if vm.State == StateRunning { - return nil - } - - return errors.Errorf("VM %s is not running", vmId) - }, waitRetryOpts...) + } + return true } -func waitForHostToBeUp(client *cloudstack.CloudStackClient, hostId string) error { - return retry.Do(func() error { - host, _, err := client.Host.GetHostByID(hostId) - if err != nil { - return errors.Wrapf(err, "failed to query status for host %s", hostId) +func contains(names []string, name string) bool { + for _, n := range names { + if n == name { + return true } - if host.State == StateUp { - return nil - } - return errors.Errorf("host %s is not up", hostId) - }, waitRetryOpts...) + } + return false } -func (impl *Impl) getK8sNodesWhenReady(ctx context.Context) ([]v1.Node, error) { + +func (impl *Impl) getK8sNodes(ctx context.Context, names []string) ([]v1.Node, error) { return retry.DoWithData(func() ([]v1.Node, error) { nodeList := v1.NodeList{} err := impl.List(ctx, &nodeList) if err != nil { return nil, errors.Wrap(err, "failed to list nodes") } - unreadyNodes := []string{} + matchingNodes := []v1.Node{} for _, node := range nodeList.Items { - for _, condition := range node.Status.Conditions { - if condition.Type == v1.NodeReady && condition.Status != v1.ConditionTrue { - unreadyNodes = append(unreadyNodes, node.Name) - break - } + if !contains(names, node.Name) { + continue } + matchingNodes = append(matchingNodes, node) } - if len(unreadyNodes) > 0 { - return nil, errors.Errorf("nodes %s not ready", strings.Join(unreadyNodes, ", ")) + return matchingNodes, nil + }, retryOpts...) +} + +func (impl *Impl) isClusterActive(ctx context.Context, names []string) (bool, error) { + nodes, err := impl.getK8sNodes(ctx, names) + if err != nil { + return false, err + } + for _, node := range nodes { + if node.Spec.Unschedulable { + return false, nil } - return nodeList.Items, nil + } + return true, nil +} - }, waitRetryOpts...) +func (impl *Impl) ensureK8sNodesReady(ctx context.Context, names []string) error { + nodes, err := impl.getK8sNodes(ctx, names) + if err != nil { + return err + } + for _, node := range nodes { + if !isK8sNodeReady(node) { + return errors.Errorf("node %s is not ready", node.Name) + } + } + return nil } -func (impl *Impl) uncordonK8sNodes(ctx context.Context, dryRun bool) error { - impl.Log.Info("Will uncordon ready nodes") - nodes, err := impl.getK8sNodesWhenReady(ctx) +func (impl *Impl) uncordonK8sNodes(ctx context.Context, names []string) error { + nodes, err := impl.getK8sNodes(ctx, names) if err != nil { - impl.Log.Error(err, "Nodes not ready") - return nil + return err } for _, node := range nodes { @@ -241,10 +300,7 @@ func (impl *Impl) uncordonK8sNodes(ctx context.Context, dryRun bool) error { continue } - impl.Log.Info("Uncordon unschedulable node", "node", node.Name, "dryRun", dryRun) - if dryRun { - continue - } + impl.Log.Info("Uncordon unschedulable node", "node", node.Name) err := retry.Do(func() error { nodeItem := v1.Node{} @@ -270,8 +326,36 @@ func (impl *Impl) uncordonK8sNodes(ctx context.Context, dryRun bool) error { return nil } -func (impl *Impl) startVMs(client *cloudstack.CloudStackClient, dryRun bool) error { - impl.Log.Info("Will start stopped VMs") +func (impl *Impl) startHost(client *cloudstack.CloudStackClient, hostName string) error { + host, err := retry.DoWithData(func() (*cloudstack.Host, error) { + host, _, err := client.Host.GetHostByName(hostName) + return host, err + }, retryOpts...) + if err != nil { + return err + } + return retry.Do(func() error { + _, err := client.OutofbandManagement.IssueOutOfBandManagementPowerAction(client.OutofbandManagement.NewIssueOutOfBandManagementPowerActionParams(ActionOn, host.Id)) + return err + }, retryOpts...) +} + +func (impl *Impl) ensureStartedHost(client *cloudstack.CloudStackClient, hostName string) error { + host, err := retry.DoWithData(func() (*cloudstack.Host, error) { + host, _, err := client.Host.GetHostByName(hostName) + return host, err + }, retryOpts...) + if err != nil { + return err + } + + if host.State != StateUp { + return errors.Errorf("host %s is not up", hostName) + } + return nil +} + +func (impl *Impl) startVMs(client *cloudstack.CloudStackClient, names []string) error { params := client.VirtualMachine.NewListVirtualMachinesParams() params.SetState(StateStopped) @@ -279,60 +363,44 @@ func (impl *Impl) startVMs(client *cloudstack.CloudStackClient, dryRun bool) err return client.VirtualMachine.ListVirtualMachines(params) }, retryOpts...) if err != nil { - return err + return errors.Wrap(err, "Failed to list stopped VMs") } - wg := sync.WaitGroup{} for _, vm := range resp.VirtualMachines { - impl.Log.Info("Starting VM", "id", vm.Id, "name", vm.Name, "dryRun", dryRun) - - if dryRun { + if !contains(names, vm.Name) { continue } - wg.Add(1) - - go func(vm *cloudstack.VirtualMachine) { - defer wg.Done() - startParams := client.VirtualMachine.NewStartVirtualMachineParams(vm.Id) - startParams.SetConsiderlasthost(true) // try to schedule to the same host - - if retry.Do(func() error { - _, err := client.VirtualMachine.StartVirtualMachine(startParams) - return err - }, retryOpts...); err != nil { - impl.Log.Error(err, "failed to start stopped vm", "name", vm.Name) - } - if err := waitForVmToBeRunning(client, vm.Id); err != nil { - impl.Log.Error(err, "failed to wait for vm to be running", "name", vm.Name) - } else { - impl.Log.Info("Started VM", "id", vm.Id, "name", vm.Name) - } + impl.Log.Info("Starting VM", "id", vm.Id, "name", vm.Name) + + startParams := client.VirtualMachine.NewStartVirtualMachineParams(vm.Id) + startParams.SetConsiderlasthost(true) // try to schedule to the same host - }(vm) + if err := retry.Do(func() error { + _, err := client.VirtualMachine.StartVirtualMachine(startParams) + return err + }, retryOpts...); err != nil { + return errors.Wrapf(err, "failed to start stopped vm %s", vm.Name) + } + impl.Log.Info("Started VM", "id", vm.Id, "name", vm.Name) } - wg.Wait() return nil } -func (impl *Impl) destroyStuckSystemVMs(client *cloudstack.CloudStackClient, dryRun bool) error { - impl.Log.Info("Will destroy stuck system VMs") +func (impl *Impl) destroyStuckSystemVMs(client *cloudstack.CloudStackClient) error { params := client.SystemVM.NewListSystemVmsParams() params.SetState(StateStopped) resp, err := retry.DoWithData(func() (*cloudstack.ListSystemVmsResponse, error) { return client.SystemVM.ListSystemVms(params) - }) + }, retryOpts...) if err != nil { - return err + return errors.Wrap(err, "Failed to list system VMs") } for _, vm := range resp.SystemVms { - impl.Log.Info("Destroying system VM", "id", vm.Id, "name", vm.Name, "dryRun", dryRun) - if dryRun { - continue - } + impl.Log.Info("Destroying system VM", "id", vm.Id, "name", vm.Name) params := client.SystemVM.NewDestroySystemVmParams(vm.Id) err := retry.Do(func() error { @@ -341,11 +409,16 @@ func (impl *Impl) destroyStuckSystemVMs(client *cloudstack.CloudStackClient, dry }, retryOpts...) if err != nil { - impl.Log.Error(err, "Failed to destroy system vm", "id", vm.Id, "name", vm.Name) - } else { - impl.Log.Info("Destroyed system VM", "id", vm.Id, "name", vm.Name) + return errors.Wrapf(err, "failed to destroy system vm %s", vm.Name) } + impl.Log.Info("Destroyed system VM", "id", vm.Id, "name", vm.Name) } return nil } + +func randomHost(hosts []*cloudstack.Host) *cloudstack.Host { + i, _ := rand.Int(rand.Reader, big.NewInt(int64(len(hosts)))) + + return hosts[i.Int64()] +} diff --git a/helm/chaos-mesh/crds/chaos-mesh.org_cloudstackhostchaos.yaml b/helm/chaos-mesh/crds/chaos-mesh.org_cloudstackhostchaos.yaml index ab46f6eff1..10bbd51136 100644 --- a/helm/chaos-mesh/crds/chaos-mesh.org_cloudstackhostchaos.yaml +++ b/helm/chaos-mesh/crds/chaos-mesh.org_cloudstackhostchaos.yaml @@ -120,6 +120,18 @@ spec: status: description: CloudStackHostChaosStatus represents the status of a CloudStackChaos. properties: + affectedHosts: + additionalProperties: + properties: + name: + type: string + vms: + items: + type: string + type: array + type: object + description: Instances keeps track of the affected hosts and vms + type: object conditions: description: Conditions represents the current global condition of the chaos diff --git a/manifests/crd.yaml b/manifests/crd.yaml index 21787a40fc..a36262722c 100644 --- a/manifests/crd.yaml +++ b/manifests/crd.yaml @@ -1214,6 +1214,18 @@ spec: status: description: CloudStackHostChaosStatus represents the status of a CloudStackChaos. properties: + affectedHosts: + additionalProperties: + properties: + name: + type: string + vms: + items: + type: string + type: array + type: object + description: Instances keeps track of the affected hosts and vms + type: object conditions: description: Conditions represents the current global condition of the chaos