@@ -1313,14 +1313,17 @@ def crawl_gpu(self):
13131313 nvidia-smi --query-gpu=utilization.gpu,utilization.memory,memory.total,memory.free,memory.used --format=csv,noheader,nounits
13141314 '''
13151315
1316- util_atttibutes = ['gpu' ,'memory' ]
1317- memory_atttibutes = ['total' ,'free' ,'used' ]
1318-
13191316 if not os .path .exists (NVIDIA_SMI ):
1320- return
1317+ return
1318+
1319+ params = ['utilization.gpu' , 'utilization.memory' , 'memory.total' ,
1320+ 'memory.free' , 'memory.used' , 'temperature.gpu' , 'power.draw' ,
1321+ 'power.limit' ]
13211322
1322- nvidia_smi_proc = subprocess .Popen ([NVIDIA_SMI , '--query-gpu=utilization.gpu,utilization.memory,memory.total,memory.free,memory.used' ,
1323- '--format=csv,noheader,nounits' ], stdout = subprocess .PIPE , stderr = subprocess .PIPE )
1323+ nvidia_smi_proc = subprocess .Popen ([NVIDIA_SMI ,
1324+ '--query-gpu={}' .format (',' .join (params )),
1325+ '--format=csv,noheader,nounits' ],
1326+ stdout = subprocess .PIPE , stderr = subprocess .PIPE )
13241327 nvidia_smi_proc_out , nvidia_smi_proc_err = nvidia_smi_proc .communicate ()
13251328
13261329 if nvidia_smi_proc .returncode > 0 :
@@ -1330,11 +1333,13 @@ def crawl_gpu(self):
13301333 for i , val_str in enumerate (metrics ):
13311334 if len (val_str ) != 0 :
13321335 values = val_str .split (',' )
1333- entry = {'utilization' :{'gpu' : values [0 ], 'memory' : values [1 ]},
1334- 'memory' : {'total' :values [2 ], 'free' : values [3 ], 'used' : values [4 ]}}
1336+ entry = {'utilization' :{'gpu' : values [0 ], 'memory' : values [1 ]},
1337+ 'memory' : {'total' :values [2 ], 'free' : values [3 ], 'used' : values [4 ]},
1338+ 'temperature' :values [5 ],
1339+ 'power' :{'draw' : values [6 ], 'limit' :values [7 ]}
1340+ }
13351341 key = 'gpu{}' .format (i )
13361342 yield (key , entry )
1337-
13381343 return
13391344
13401345 def _crawl_wrapper (self , _function , namespaces = ALL_NAMESPACES , * args ):
0 commit comments