6767 StaticXgmiPlpd ,
6868 ValueUnit ,
6969)
70+ from nodescraper .plugins .inband .amdsmi .collector_args import AmdSmiCollectorArgs
7071from nodescraper .utils import get_exception_traceback
7172
7273
73- class AmdSmiCollector (InBandDataCollector [AmdSmiDataModel , None ]):
74+ class AmdSmiCollector (InBandDataCollector [AmdSmiDataModel , AmdSmiCollectorArgs ]):
7475 """Class for collection of inband tool amd-smi data."""
7576
7677 AMD_SMI_EXE = "amd-smi"
@@ -87,6 +88,7 @@ class AmdSmiCollector(InBandDataCollector[AmdSmiDataModel, None]):
8788 CMD_STATIC = "static -g all --json"
8889 CMD_STATIC_GPU = "static -g {gpu_id} --json"
8990 CMD_RAS = "ras --cper --folder={folder}"
91+ CMD_RAS_AFID = "ras --afid --cper-file {cper_file}"
9092
9193 def _check_amdsmi_installed (self ) -> bool :
9294 """Check if amd-smi is installed
@@ -1266,14 +1268,57 @@ def get_cper_data(self) -> List[FileModel]:
12661268 )
12671269 return []
12681270
1271+ def _get_cper_afid (self , cper_file_path : str ) -> Optional [int ]:
1272+ """Get AFID from a CPER file
1273+
1274+ Args:
1275+ cper_file_path (str): Path to the CPER file
1276+
1277+ Returns:
1278+ Optional[int]: AFID value or None
1279+ """
1280+ cmd = self .CMD_RAS_AFID .format (cper_file = cper_file_path )
1281+ result = self ._run_amd_smi (cmd )
1282+
1283+ if result is None :
1284+ self ._log_event (
1285+ category = EventCategory .APPLICATION ,
1286+ description = f"Failed to get AFID from CPER file: { cper_file_path } " ,
1287+ priority = EventPriority .ERROR ,
1288+ console_log = True ,
1289+ )
1290+ return None
1291+
1292+ try :
1293+ afid = int (result .strip ())
1294+ self ._log_event (
1295+ category = EventCategory .APPLICATION ,
1296+ description = f"Successfully retrieved AFID from CPER file: { cper_file_path } " ,
1297+ data = {"afid" : afid , "cper_file" : cper_file_path },
1298+ priority = EventPriority .INFO ,
1299+ console_log = True ,
1300+ )
1301+ return afid
1302+ except ValueError :
1303+ self ._log_event (
1304+ category = EventCategory .APPLICATION ,
1305+ description = f"Failed to parse AFID value from output: { result } " ,
1306+ data = {"output" : result , "cper_file" : cper_file_path },
1307+ priority = EventPriority .ERROR ,
1308+ console_log = True ,
1309+ )
1310+ return None
1311+
12691312 def collect_data (
12701313 self ,
1271- args : Any = None ,
1314+ args : Optional [ AmdSmiCollectorArgs ] = None ,
12721315 ) -> tuple [TaskResult , Optional [AmdSmiDataModel ]]:
12731316 """Collect AmdSmi data from system
12741317
12751318 Args:
1276- args (Any, optional): optional arguments for data collection. Defaults to None.
1319+ args (Optional[AmdSmiCollectorArgs], optional): optional arguments for data collection.
1320+ If cper_file_path is provided, will run amd-smi ras --afid --cper-file command.
1321+ Defaults to None.
12771322
12781323 Returns:
12791324 tuple[TaskResult, Optional[AmdSmiDataModel]]: task result and collected data model
@@ -1300,6 +1345,11 @@ def collect_data(
13001345 if amd_smi_data is None :
13011346 return self .result , None
13021347
1348+ # If cper_file_path is provided, get AFID from the CPER file
1349+ if args and args .cper_file_path :
1350+ afid = self ._get_cper_afid (args .cper_file_path )
1351+ amd_smi_data .cper_afid = afid
1352+
13031353 return self .result , amd_smi_data
13041354 except Exception as e :
13051355 self ._log_event (
0 commit comments