@@ -270,33 +270,27 @@ def check_outputs(output_list,
270270 sys .exit (1 )
271271
272272
273- def print_benchmark_result (result ,
274- task = "speed" ,
275- log_level = 0 ,
276- config_params = None ):
277- assert isinstance (result , dict ), "Input result should be a dict."
273+ def _print_runtime (log_level , runtimes , walltimes ):
274+ if runtimes is None :
275+ return
278276
279- status = collections .OrderedDict ()
280- status ["framework" ] = result ["framework" ]
281- status ["version" ] = result ["version" ]
282- status ["name" ] = result ["name" ]
283- status ["device" ] = result ["device" ]
284- status ["backward" ] = result ["backward" ]
277+ # print all times
278+ repeat = len (runtimes )
279+ seg_range = [0 , 0 ]
280+ if log_level == 0 :
281+ seg_range = [0 , repeat ]
282+ elif log_level == 1 and repeat > 20 :
283+ seg_range = [10 , repeat - 10 ]
284+ for i in range (repeat ):
285+ if i < seg_range [0 ] or i >= seg_range [1 ]:
286+ walltime = walltimes [i ] if walltimes is not None else 0
287+ print ("Iter %4d, Runtime: %.5f ms, Walltime: %.5f ms" %
288+ (i , runtimes [i ], walltime ))
285289
286- scheduling_times = result .get ("scheduling_times" , "{}" )
287- if task == "scheduling" and scheduling_times is not None :
288- status ["scheduling" ] = eval (scheduling_times )
289290
290- runtimes = result . get ( "total" , None )
291+ def _compute_average_runtime ( runtimes , walltimes ):
291292 if runtimes is None :
292- status ["parameters" ] = config_params
293- print (json .dumps (status ))
294- return
295-
296- walltimes = result .get ("wall_time" , None )
297- gpu_time = result .get ("gpu_time" , None )
298- stable = result .get ("stable" , None )
299- diff = result .get ("diff" , None )
293+ return 0 , 0 , 0 , 0
300294
301295 repeat = len (runtimes )
302296 for i in range (repeat ):
@@ -320,47 +314,70 @@ def print_benchmark_result(result,
320314 avg_walltime = np .average (np .sort (walltimes )[begin :end ])
321315 else :
322316 avg_walltime = 0
317+ return begin , end , avg_runtime , avg_walltime
323318
324- # print all times
325- seg_range = [0 , 0 ]
326- if log_level == 0 :
327- seg_range = [0 , repeat ]
328- elif log_level == 1 and repeat > 20 :
329- seg_range = [10 , repeat - 10 ]
330- for i in range (len (runtimes )):
331- if i < seg_range [0 ] or i >= seg_range [1 ]:
332- walltime = walltimes [i ] if walltimes is not None else 0
333- print ("Iter %4d, Runtime: %.5f ms, Walltime: %.5f ms" %
334- (i , runtimes [i ], walltime ))
335319
336- if avg_runtime - avg_walltime > 0.001 :
337- total = avg_runtime - avg_walltime
338- else :
339- print (
340- "Average runtime (%.5f ms) is less than average walltime (%.5f ms)."
341- % (avg_runtime , avg_walltime ))
342- total = 0.001
320+ def print_benchmark_result (result ,
321+ task = "speed" ,
322+ log_level = 0 ,
323+ config_params = None ):
324+ assert isinstance (result , dict ), "Input result should be a dict."
325+
326+ status = collections .OrderedDict ()
327+ status ["framework" ] = result ["framework" ]
328+ status ["version" ] = result ["version" ]
329+ status ["name" ] = result ["name" ]
330+ status ["device" ] = result ["device" ]
331+ status ["backward" ] = result ["backward" ]
343332
333+ scheduling_times = result .get ("scheduling_times" , "{}" )
334+ if task == "scheduling" and scheduling_times is not None :
335+ status ["scheduling" ] = eval (scheduling_times )
336+ status ["parameters" ] = config_params
337+ print (json .dumps (status ))
338+ return
339+
340+ stable = result .get ("stable" , None )
341+ diff = result .get ("diff" , None )
344342 if stable is not None and diff is not None :
345343 status ["precision" ] = collections .OrderedDict ()
346344 status ["precision" ]["stable" ] = stable
347345 status ["precision" ]["diff" ] = diff
348- status ["speed" ] = collections .OrderedDict ()
349- status ["speed" ]["repeat" ] = repeat
350- status ["speed" ]["begin" ] = begin
351- status ["speed" ]["end" ] = end
352- status ["speed" ]["total" ] = total
353- status ["speed" ]["wall_time" ] = avg_walltime
354- status ["speed" ]["total_include_wall_time" ] = avg_runtime
355- if gpu_time is not None :
356- avg_gpu_time = gpu_time / repeat
357- status ["speed" ]["gpu_time" ] = avg_gpu_time
358-
359- flop = result .get ("flop" , None )
360- byte = result .get ("byte" , None )
361- if flop is not None and abs (avg_gpu_time ) > 1E-6 :
362- status ["speed" ]["gflops" ] = float (flop ) * 1E-6 / avg_gpu_time
363- if byte is not None and abs (avg_gpu_time ) > 1E-6 :
364- status ["speed" ]["gbs" ] = float (byte ) * 1E-6 / avg_gpu_time
346+
347+ if task == "speed" :
348+ runtimes = result .get ("total" , None )
349+ walltimes = result .get ("wall_time" , None )
350+ gpu_time = result .get ("gpu_time" , None )
351+
352+ repeat = len (runtimes ) if runtimes is not None else result .get (
353+ "repeat" , 1 )
354+ begin , end , avg_runtime , avg_walltime = _compute_average_runtime (
355+ runtimes , walltimes )
356+ _print_runtime (log_level , runtimes , walltimes )
357+ if avg_runtime - avg_walltime > 0.001 :
358+ total = avg_runtime - avg_walltime
359+ else :
360+ print (
361+ "Average runtime (%.5f ms) is less than average walltime (%.5f ms)."
362+ % (avg_runtime , avg_walltime ))
363+ total = 0.001
364+
365+ status ["speed" ] = collections .OrderedDict ()
366+ status ["speed" ]["repeat" ] = repeat
367+ status ["speed" ]["begin" ] = begin
368+ status ["speed" ]["end" ] = end
369+ status ["speed" ]["total" ] = total
370+ status ["speed" ]["wall_time" ] = avg_walltime
371+ status ["speed" ]["total_include_wall_time" ] = avg_runtime
372+ if gpu_time is not None :
373+ avg_gpu_time = gpu_time / repeat
374+ status ["speed" ]["gpu_time" ] = avg_gpu_time
375+
376+ flop = result .get ("flop" , None )
377+ byte = result .get ("byte" , None )
378+ if flop is not None and abs (avg_gpu_time ) > 1E-6 :
379+ status ["speed" ]["gflops" ] = float (flop ) * 1E-6 / avg_gpu_time
380+ if byte is not None and abs (avg_gpu_time ) > 1E-6 :
381+ status ["speed" ]["gbs" ] = float (byte ) * 1E-6 / avg_gpu_time
365382 status ["parameters" ] = config_params
366383 print (json .dumps (status ))
0 commit comments