Skip to content

Commit d8aafc8

Browse files
committed
AFID
1 parent 6b70f84 commit d8aafc8

File tree

4 files changed

+92
-4
lines changed

4 files changed

+92
-4
lines changed

nodescraper/plugins/inband/amdsmi/amdsmi_collector.py

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,11 @@
6767
StaticXgmiPlpd,
6868
ValueUnit,
6969
)
70+
from nodescraper.plugins.inband.amdsmi.collector_args import AmdSmiCollectorArgs
7071
from nodescraper.utils import get_exception_traceback
7172

7273

73-
class AmdSmiCollector(InBandDataCollector[AmdSmiDataModel, None]):
74+
class AmdSmiCollector(InBandDataCollector[AmdSmiDataModel, AmdSmiCollectorArgs]):
7475
"""Class for collection of inband tool amd-smi data."""
7576

7677
AMD_SMI_EXE = "amd-smi"
@@ -87,6 +88,7 @@ class AmdSmiCollector(InBandDataCollector[AmdSmiDataModel, None]):
8788
CMD_STATIC = "static -g all --json"
8889
CMD_STATIC_GPU = "static -g {gpu_id} --json"
8990
CMD_RAS = "ras --cper --folder={folder}"
91+
CMD_RAS_AFID = "ras --afid --cper-file {cper_file}"
9092

9193
def _check_amdsmi_installed(self) -> bool:
9294
"""Check if amd-smi is installed
@@ -1266,14 +1268,57 @@ def get_cper_data(self) -> List[FileModel]:
12661268
)
12671269
return []
12681270

1271+
def _get_cper_afid(self, cper_file_path: str) -> Optional[int]:
1272+
"""Get AFID from a CPER file
1273+
1274+
Args:
1275+
cper_file_path (str): Path to the CPER file
1276+
1277+
Returns:
1278+
Optional[int]: AFID value or None
1279+
"""
1280+
cmd = self.CMD_RAS_AFID.format(cper_file=cper_file_path)
1281+
result = self._run_amd_smi(cmd)
1282+
1283+
if result is None:
1284+
self._log_event(
1285+
category=EventCategory.APPLICATION,
1286+
description=f"Failed to get AFID from CPER file: {cper_file_path}",
1287+
priority=EventPriority.ERROR,
1288+
console_log=True,
1289+
)
1290+
return None
1291+
1292+
try:
1293+
afid = int(result.strip())
1294+
self._log_event(
1295+
category=EventCategory.APPLICATION,
1296+
description=f"Successfully retrieved AFID from CPER file: {cper_file_path}",
1297+
data={"afid": afid, "cper_file": cper_file_path},
1298+
priority=EventPriority.INFO,
1299+
console_log=True,
1300+
)
1301+
return afid
1302+
except ValueError:
1303+
self._log_event(
1304+
category=EventCategory.APPLICATION,
1305+
description=f"Failed to parse AFID value from output: {result}",
1306+
data={"output": result, "cper_file": cper_file_path},
1307+
priority=EventPriority.ERROR,
1308+
console_log=True,
1309+
)
1310+
return None
1311+
12691312
def collect_data(
12701313
self,
1271-
args: Any = None,
1314+
args: Optional[AmdSmiCollectorArgs] = None,
12721315
) -> tuple[TaskResult, Optional[AmdSmiDataModel]]:
12731316
"""Collect AmdSmi data from system
12741317
12751318
Args:
1276-
args (Any, optional): optional arguments for data collection. Defaults to None.
1319+
args (Optional[AmdSmiCollectorArgs], optional): optional arguments for data collection.
1320+
If cper_file_path is provided, will run amd-smi ras --afid --cper-file command.
1321+
Defaults to None.
12771322
12781323
Returns:
12791324
tuple[TaskResult, Optional[AmdSmiDataModel]]: task result and collected data model
@@ -1300,6 +1345,11 @@ def collect_data(
13001345
if amd_smi_data is None:
13011346
return self.result, None
13021347

1348+
# If cper_file_path is provided, get AFID from the CPER file
1349+
if args and args.cper_file_path:
1350+
afid = self._get_cper_afid(args.cper_file_path)
1351+
amd_smi_data.cper_afid = afid
1352+
13031353
return self.result, amd_smi_data
13041354
except Exception as e:
13051355
self._log_event(

nodescraper/plugins/inband/amdsmi/amdsmi_plugin.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,18 @@
2929
from .amdsmi_collector import AmdSmiCollector
3030
from .amdsmidata import AmdSmiDataModel
3131
from .analyzer_args import AmdSmiAnalyzerArgs
32+
from .collector_args import AmdSmiCollectorArgs
3233

3334

34-
class AmdSmiPlugin(InBandDataPlugin[AmdSmiDataModel, None, AmdSmiAnalyzerArgs]):
35+
class AmdSmiPlugin(InBandDataPlugin[AmdSmiDataModel, AmdSmiCollectorArgs, AmdSmiAnalyzerArgs]):
3536
"""Plugin for collection and analysis of amdsmi data"""
3637

3738
DATA_MODEL = AmdSmiDataModel
3839

3940
COLLECTOR = AmdSmiCollector
4041

42+
COLLECTOR_ARGS = AmdSmiCollectorArgs
43+
4144
ANALYZER = AmdSmiAnalyzer
4245

4346
ANALYZER_ARGS = AmdSmiAnalyzerArgs

nodescraper/plugins/inband/amdsmi/amdsmidata.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -954,6 +954,7 @@ class AmdSmiDataModel(DataModel):
954954
xgmi_metric: Optional[list[XgmiMetrics]] = Field(default_factory=list)
955955
xgmi_link: Optional[list[XgmiLinks]] = Field(default_factory=list)
956956
cper_data: Optional[list[FileModel]] = Field(default_factory=list)
957+
cper_afid: Optional[int] = None
957958
amdsmitst_data: AmdSmiTstData = Field(default_factory=AmdSmiTstData)
958959

959960
def get_list(self, gpu: int) -> Optional[AmdSmiListItem]:
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
###############################################################################
2+
#
3+
# MIT License
4+
#
5+
# Copyright (c) 2026 Advanced Micro Devices, Inc.
6+
#
7+
# Permission is hereby granted, free of charge, to any person obtaining a copy
8+
# of this software and associated documentation files (the "Software"), to deal
9+
# in the Software without restriction, including without limitation the rights
10+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
# copies of the Software, and to permit persons to whom the Software is
12+
# furnished to do so, subject to the following conditions:
13+
#
14+
# The above copyright notice and this permission notice shall be included in all
15+
# copies or substantial portions of the Software.
16+
#
17+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
# SOFTWARE.
24+
#
25+
###############################################################################
26+
from typing import Optional
27+
28+
from nodescraper.models import CollectorArgs
29+
30+
31+
class AmdSmiCollectorArgs(CollectorArgs):
32+
"""Collector arguments for AmdSmiPlugin"""
33+
34+
cper_file_path: Optional[str] = None

0 commit comments

Comments
 (0)