Skip to content

Commit c886359

Browse files
author
wangshuai
committed
feat: add device filtering for GPU selection
Signed-off-by: wangshuai <1090646861@qq.com>
1 parent 2bf6dfe commit c886359

File tree

3 files changed

+58
-0
lines changed

3 files changed

+58
-0
lines changed

doc/config.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,8 @@ String type, `hami-core` for using hami-core for container resource limitation,
5656
Integer type, device memory oversubscription on that node
5757
* `devicecorescaling`:
5858
Integer type, device core oversubscription on that node
59+
* `devicesplitcount`: Allowed number of tasks sharing a device.
60+
* `filterdevices`: Devices that are not registered to HAMi.
61+
* `uuid`: UUIDs of devices to ignore
62+
* `index`: Indexes of devices to ignore.
63+
* A device is ignored by HAMi if it's in `uuid` or `index` list.

pkg/plugin/vgpu/config/config.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,3 +146,48 @@ type DevicePluginConfigs struct {
146146
FilterDevice *FilterDevice `json:"filterdevices"`
147147
} `json:"nodeconfig"`
148148
}
149+
150+
var (
151+
filterOnce sync.Once
152+
uuidMap map[string]struct{}
153+
indexMap map[uint]struct{}
154+
)
155+
156+
func FilterDeviceToRegister(uuid string, index int) bool {
157+
filterOnce.Do(initFilter)
158+
if len(uuidMap) == 0 && len(indexMap) == 0 {
159+
return false
160+
}
161+
162+
if _, ok := uuidMap[uuid]; ok {
163+
return true
164+
}
165+
166+
if _, ok := indexMap[uint(index)]; ok {
167+
return true
168+
}
169+
170+
return false
171+
}
172+
173+
func initFilter() {
174+
uuidMap = make(map[string]struct{})
175+
indexMap = make(map[uint]struct{})
176+
if DevicePluginFilterDevice == nil {
177+
return
178+
}
179+
180+
if len(DevicePluginFilterDevice.UUID) > 0 {
181+
uuidMap = make(map[string]struct{}, len(DevicePluginFilterDevice.UUID))
182+
for _, u := range DevicePluginFilterDevice.UUID {
183+
uuidMap[u] = struct{}{}
184+
}
185+
}
186+
187+
if len(DevicePluginFilterDevice.Index) > 0 {
188+
indexMap = make(map[uint]struct{}, len(DevicePluginFilterDevice.Index))
189+
for _, idx := range DevicePluginFilterDevice.Index {
190+
indexMap[idx] = struct{}{}
191+
}
192+
}
193+
}

pkg/plugin/vgpu/nvidia.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,14 @@ func (g *GpuDeviceManager) Devices() []*Device {
9696
d, ret := config.Nvml().DeviceGetHandleByIndex(i)
9797
check(ret)
9898

99+
uuid, ret := d.GetUUID()
100+
check(ret)
101+
// Filter GPU device
102+
if config.FilterDeviceToRegister(uuid, i) {
103+
klog.V(5).Infof("Filtering out GPU device index=%d, uuid=%s", i, uuid)
104+
continue
105+
}
106+
99107
migMode, _, ret := d.GetMigMode()
100108
if ret != nvml.SUCCESS {
101109
if ret == nvml.ERROR_NOT_SUPPORTED {

0 commit comments

Comments
 (0)