diff --git a/server/pkg/api/server_add_instance.go b/server/pkg/api/server_add_instance.go index ae5b69f..e6f38e4 100644 --- a/server/pkg/api/server_add_instance.go +++ b/server/pkg/api/server_add_instance.go @@ -86,6 +86,7 @@ func (s *ShoesLXDMultiServer) AddInstance(ctx context.Context, req *pb.AddInstan if err != nil { return nil, status.Errorf(codes.Internal, "failed to retrieve instance information: %+v", err) } + log.Printf("Success AddInstance host: %s, runnerName: %s\n", host.HostConfig.LxdHost, i.Name) return &pb.AddInstanceResponse{ CloudId: i.Name, @@ -103,7 +104,7 @@ func (s *ShoesLXDMultiServer) setLXDStatusCache(targetLXDHosts []lxdclient.LXDHo if err != nil { return nil, api.InstancesPost{}, status.Errorf(codes.InvalidArgument, "failed to schedule host: %+v", err) } - log.Printf("AddInstance scheduled host: %s\n", host.HostConfig.LxdHost) + log.Printf("AddInstance scheduled host: %s, runnerName: %s\n", host.HostConfig.LxdHost, instanceName) reqInstance := api.InstancesPost{ InstancePut: api.InstancePut{ diff --git a/server/pkg/metric/scrape_lxd.go b/server/pkg/metric/scrape_lxd.go index 2956452..8dddce7 100644 --- a/server/pkg/metric/scrape_lxd.go +++ b/server/pkg/metric/scrape_lxd.go @@ -3,7 +3,9 @@ package metric import ( "context" "fmt" + "log" "strconv" + "sync" "github.com/docker/go-units" @@ -58,68 +60,84 @@ func (ScraperLXD) Help() string { // Scrape scrape metrics func (ScraperLXD) Scrape(ctx context.Context, hostConfigs []config.HostConfig, ch chan<- prometheus.Metric) error { - if err := scrapeLXDHost(ctx, hostConfigs, ch); err != nil { + if err := scrapeLXDHosts(ctx, hostConfigs, ch); err != nil { return fmt.Errorf("failed to scrape LXD host: %w", err) } return nil } -func scrapeLXDHost(ctx context.Context, hostConfigs []config.HostConfig, ch chan<- prometheus.Metric) error { +func scrapeLXDHosts(ctx context.Context, hostConfigs []config.HostConfig, ch chan<- prometheus.Metric) error { hosts, err := lxdclient.ConnectLXDs(hostConfigs) if err != nil { return fmt.Errorf("failed to connect LXD hosts: %w", err) } + wg := sync.WaitGroup{} + for _, host := range hosts { - allCPU, allMemory, hostname, err := lxdclient.ScrapeLXDHostResources(host.Client) - if err != nil { - return fmt.Errorf("failed to scrape lxd resources: %w", err) - } + wg.Add(1) + host := host + go func(host lxdclient.LXDHost) { + defer wg.Done() - ch <- prometheus.MustNewConstMetric( - lxdHostMaxCPU, prometheus.GaugeValue, float64(allCPU), hostname) - ch <- prometheus.MustNewConstMetric( - lxdHostMaxMemory, prometheus.GaugeValue, float64(allMemory), hostname) + if err := scrapeLXDHost(host, ch); err != nil { + log.Printf("failed to scrape LXD host: %s, %s\n", host.HostConfig.LxdHost, err) + } + }(host) + } + wg.Wait() + return nil +} - instances, err := lxdclient.GetAnyInstances(host.Client) - if err != nil { - return fmt.Errorf("failed to retrieve list of instance (host: %s): %w", hostname, err) - } +func scrapeLXDHost(host lxdclient.LXDHost, ch chan<- prometheus.Metric) error { + allCPU, allMemory, hostname, err := lxdclient.ScrapeLXDHostResources(host.Client) + if err != nil { + return fmt.Errorf("failed to scrape lxd resources: %w", err) + } - for _, instance := range instances { - memory, err := units.FromHumanSize(instance.Config["limits.memory"]) - if err != nil { - return fmt.Errorf("failed to convert limits.memory: %w", err) - } + ch <- prometheus.MustNewConstMetric( + lxdHostMaxCPU, prometheus.GaugeValue, float64(allCPU), hostname) + ch <- prometheus.MustNewConstMetric( + lxdHostMaxMemory, prometheus.GaugeValue, float64(allMemory), hostname) - ch <- prometheus.MustNewConstMetric( - lxdInstance, prometheus.GaugeValue, 1, - instance.Name, hostname, instance.Config["limits.cpu"], strconv.FormatInt(memory, 10), - ) - } + instances, err := lxdclient.GetAnyInstances(host.Client) + if err != nil { + return fmt.Errorf("failed to retrieve list of instance (host: %s): %w", hostname, err) + } - allocatedCPU, allocatedMemory, err := lxdclient.ScrapeLXDHostAllocatedResources(instances) + for _, instance := range instances { + memory, err := units.FromHumanSize(instance.Config["limits.memory"]) if err != nil { - return fmt.Errorf("failed to scrape instance info: %w", err) + return fmt.Errorf("failed to convert limits.memory: %w", err) } + ch <- prometheus.MustNewConstMetric( - lxdUsageCPU, prometheus.GaugeValue, float64(allocatedCPU), hostname) - ch <- prometheus.MustNewConstMetric( - lxdUsageMemory, prometheus.GaugeValue, float64(allocatedMemory), hostname) - - s := lxdclient.LXDStatus{ - Resource: lxdclient.Resource{ - CPUTotal: allCPU, - MemoryTotal: allMemory, - CPUUsed: allocatedCPU, - MemoryUsed: allocatedMemory, - }, - HostConfig: host.HostConfig, - } - if err := lxdclient.SetStatusCache(host.HostConfig.LxdHost, s); err != nil { - return fmt.Errorf("failed to set status cache: %w", err) - } + lxdInstance, prometheus.GaugeValue, 1, + instance.Name, hostname, instance.Config["limits.cpu"], strconv.FormatInt(memory, 10), + ) + } + + allocatedCPU, allocatedMemory, err := lxdclient.ScrapeLXDHostAllocatedResources(instances) + if err != nil { + return fmt.Errorf("failed to scrape instance info: %w", err) + } + ch <- prometheus.MustNewConstMetric( + lxdUsageCPU, prometheus.GaugeValue, float64(allocatedCPU), hostname) + ch <- prometheus.MustNewConstMetric( + lxdUsageMemory, prometheus.GaugeValue, float64(allocatedMemory), hostname) + + s := lxdclient.LXDStatus{ + Resource: lxdclient.Resource{ + CPUTotal: allCPU, + MemoryTotal: allMemory, + CPUUsed: allocatedCPU, + MemoryUsed: allocatedMemory, + }, + HostConfig: host.HostConfig, + } + if err := lxdclient.SetStatusCache(host.HostConfig.LxdHost, s); err != nil { + return fmt.Errorf("failed to set status cache: %w", err) } return nil