Skip to content

Commit

Permalink
Adding Unit test and fixing typos
Browse files Browse the repository at this point in the history
Signed-off-by: Vishesh Tanksale <vtanksale@nvidia.com>
  • Loading branch information
visheshtanksale committed Jun 24, 2024
1 parent 4e2818b commit ac2cb43
Show file tree
Hide file tree
Showing 9 changed files with 126 additions and 38 deletions.
16 changes: 5 additions & 11 deletions internal/lm/nvml.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func NewDeviceLabeler(manager resource.Manager, config *spec.Config) (Labeler, e
return nil, fmt.Errorf("error creating resource labeler: %v", err)
}

gpuModeLabeler, err := newGPUModeLabeler(manager)
gpuModeLabeler, err := newGPUModeLabeler(devices)
if err != nil {
return nil, fmt.Errorf("error creating resource labeler: %v", err)
}
Expand Down Expand Up @@ -202,15 +202,9 @@ func isMPSCapable(manager resource.Manager) (bool, error) {
return true, nil
}

func newGPUModeLabeler(manager resource.Manager) (Labeler, error) {
devices, err := manager.GetDevices()
if err != nil {
return nil, err
}
if len(devices) == 0 {
// no devices, return empty labels
return empty{}, nil
}
// newGPUModeLabeler creates a new labeler that reports the mode of GPUs on the node.
// GPUs can be in Graphics or Compute mode.
func newGPUModeLabeler(devices []resource.Device) (Labeler, error) {
classes, err := getDeviceClasses(devices)
if err != nil {
return nil, err
Expand Down Expand Up @@ -244,7 +238,7 @@ func getModeForClasses(classes []uint32) string {
func getDeviceClasses(devices []resource.Device) ([]uint32, error) {
seenClasses := make(map[uint32]bool)
for _, d := range devices {
class, err := d.GetPIEClass()
class, err := d.GetPCIClass()
if err != nil {
return nil, err
}
Expand Down
86 changes: 86 additions & 0 deletions internal/lm/nvml_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,3 +204,89 @@ func TestSharingLabeler(t *testing.T) {
})
}
}

func TestGPUModeLabeler(t *testing.T) {
testCases := []struct {
description string
devices []resource.Device
expectedError bool
expectedLabels map[string]string
}{
{
description: "single device with compute PCI class",
devices: []resource.Device{
rt.NewDeviceWithPCIClassMock(0x030000),
},
expectedLabels: map[string]string{
"nvidia.com/gpu.mode": "graphics",
},
},
{
description: "single device with graphics PCI class",
devices: []resource.Device{
rt.NewDeviceWithPCIClassMock(0x030200),
},
expectedLabels: map[string]string{
"nvidia.com/gpu.mode": "compute",
},
},
{
description: "single device with switch PCI class",
devices: []resource.Device{
rt.NewDeviceWithPCIClassMock(0x068000),
},
expectedLabels: map[string]string{
"nvidia.com/gpu.mode": "unknown",
},
},
{
description: "multiple device have same graphics PCI class",
devices: []resource.Device{
rt.NewDeviceWithPCIClassMock(0x030200),
rt.NewDeviceWithPCIClassMock(0x030200),
rt.NewDeviceWithPCIClassMock(0x030200),
},
expectedLabels: map[string]string{
"nvidia.com/gpu.mode": "compute",
},
},
{
description: "multiple device have same compute PCI class",
devices: []resource.Device{
rt.NewDeviceWithPCIClassMock(0x030000),
rt.NewDeviceWithPCIClassMock(0x030000),
rt.NewDeviceWithPCIClassMock(0x030000),
},
expectedLabels: map[string]string{
"nvidia.com/gpu.mode": "graphics",
},
},
{
description: "multiple device with some with graphics and others with compute PCI class",
devices: []resource.Device{
rt.NewDeviceWithPCIClassMock(0x030000),
rt.NewDeviceWithPCIClassMock(0x030200),
rt.NewDeviceWithPCIClassMock(0x030000),
},
expectedLabels: map[string]string{
"nvidia.com/gpu.mode": "unknown",
},
},
}

for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {

gpuModeLabeler, _ := newGPUModeLabeler(tc.devices)

labels, err := gpuModeLabeler.Labels()
if tc.expectedError {
require.Error(t, err)
} else {
require.NoError(t, err)
}

require.EqualValues(t, tc.expectedLabels, labels)
})
}
}
2 changes: 1 addition & 1 deletion internal/resource/cuda-device.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,6 @@ func (d *cudaDevice) IsMigEnabled() (bool, error) {
return false, nil
}

func (d *cudaDevice) GetPIEClass() (uint32, error) {
func (d *cudaDevice) GetPCIClass() (uint32, error) {
return 0, nil
}
42 changes: 21 additions & 21 deletions internal/resource/device_mock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion internal/resource/nvml-device.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func (d nvmlDevice) GetTotalMemoryMB() (uint64, error) {
return info.Total / (1024 * 1024), nil
}

func (d nvmlDevice) GetPIEClass() (uint32, error) {
func (d nvmlDevice) GetPCIClass() (uint32, error) {
pciBusID, err := d.GetPCIBusID()
if err != nil {
return 0, err
Expand Down
3 changes: 2 additions & 1 deletion internal/resource/nvml-mig-device.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,12 @@ func totalMemory(attr map[string]interface{}) (uint64, error) {
}
}

func (d nvmlMigDevice) GetPIEClass() (uint32, error) {
func (d nvmlMigDevice) GetPCIClass() (uint32, error) {
info, retVal := d.MigDevice.GetPciInfo()
if retVal != nvml.SUCCESS {
return 0, retVal
}

var bytes []byte
for _, char := range info.BusId {
if char == 0 {
Expand Down
2 changes: 1 addition & 1 deletion internal/resource/sysfs-device.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,6 @@ func (d vfioDevice) IsMigCapable() (bool, error) {
return false, nil
}

func (d vfioDevice) GetPIEClass() (uint32, error) {
func (d vfioDevice) GetPCIClass() (uint32, error) {
return d.nvidiaPCIDevice.Class, nil
}
9 changes: 8 additions & 1 deletion internal/resource/testing/resource-testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,14 @@ func NewDeviceMock(migEnabled bool) *DeviceMock {
IsMigEnabledFunc: func() (bool, error) { return migEnabled, nil },
IsMigCapableFunc: func() (bool, error) { return migEnabled, nil },
GetMigDevicesFunc: func() ([]resource.Device, error) { return nil, nil },
GetPIEClassFunc: func() (uint32, error) { return 0x030000, nil },
GetPCIClassFunc: func() (uint32, error) { return 0x030000, nil },
}}
return &d
}

func NewDeviceWithPCIClassMock(pciClass uint32) *DeviceMock {
d := DeviceMock{resource.DeviceMock{
GetPCIClassFunc: func() (uint32, error) { return pciClass, nil },
}}
return &d
}
Expand Down
2 changes: 1 addition & 1 deletion internal/resource/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,5 @@ type Device interface {
GetTotalMemoryMB() (uint64, error)
GetDeviceHandleFromMigDeviceHandle() (Device, error)
GetCudaComputeCapability() (int, int, error)
GetPIEClass() (uint32, error)
GetPCIClass() (uint32, error)
}

0 comments on commit ac2cb43

Please sign in to comment.