Skip to content

Commit a65f110

Browse files
committed
Add hooks for CAN errors
1 parent 9c97bd1 commit a65f110

File tree

11 files changed

+227
-30
lines changed

11 files changed

+227
-30
lines changed

pycyphal/transport/can/_can.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from __future__ import annotations
66
import copy
7+
import enum
78
import typing
89
import asyncio
910
import logging
@@ -15,6 +16,7 @@
1516
from ._session import CANInputSession, CANOutputSession, SendTransaction
1617
from ._session import BroadcastCANOutputSession, UnicastCANOutputSession
1718
from ._frame import CyphalFrame, TRANSFER_ID_MODULO
19+
from .media import Media
1820
from ._identifier import CANID, generate_filter_configurations
1921
from ._input_dispatch_table import InputDispatchTable
2022
from ._tracer import CANTracer, CANCapture
@@ -84,6 +86,14 @@ class CANTransport(pycyphal.transport.Transport):
8486

8587
TRANSFER_ID_MODULO = TRANSFER_ID_MODULO
8688

89+
class Error(enum.Enum):
90+
"""Transport-specific error codes."""
91+
92+
SEND_TIMEOUT = enum.auto() # Did not send within the specified deadline
93+
94+
ErrorHandler = typing.Callable[[Timestamp, Error | Media.Error], None]
95+
"""The error handler is non-blocking and non-yielding; returns immediately."""
96+
8797
def __init__(
8898
self,
8999
media: Media,
@@ -115,6 +125,8 @@ def __init__(
115125

116126
self._frame_stats = CANTransportStatistics()
117127

128+
self._error_hooks: typing.List[CANTransport.ErrorHandler] = []
129+
118130
if self._local_node_id is not None and not 0 <= self._local_node_id <= CANID.NODE_ID_MASK:
119131
raise ValueError(f"Invalid node ID for CAN: {self._local_node_id}")
120132

@@ -130,7 +142,16 @@ def __init__(
130142
f"The number of acceptance filters is too low: {media.number_of_acceptance_filters}"
131143
)
132144

133-
media.start(self._on_frames_received, no_automatic_retransmission=self._local_node_id is None)
145+
media.start(self._on_frames_received, self._on_error, no_automatic_retransmission=self._local_node_id is None)
146+
147+
def add_error_hook(self, hook: CANTransport.ErrorHandler) -> None:
148+
"""Register an error hook. Called on transport or media error."""
149+
self._error_hooks.append(hook)
150+
151+
def _on_error(self, timestamp: Timestamp, error: CANTransport.Error | Media.Error) -> None:
152+
"""Call all registered hooks on error in media or transport layer."""
153+
for hook in self._error_hooks:
154+
hook(timestamp, error)
134155

135156
@property
136157
def protocol_parameters(self) -> pycyphal.transport.ProtocolParameters:
@@ -157,6 +178,8 @@ def output_sessions(self) -> typing.Sequence[CANOutputSession]:
157178
return list(self._output_registry.values())
158179

159180
def close(self) -> None:
181+
self._error_hooks.clear()
182+
160183
for s in (*self.input_sessions, *self.output_sessions):
161184
try:
162185
s.close()
@@ -373,6 +396,7 @@ async def _do_send(self, t: SendTransaction) -> bool:
373396
len(t.frames),
374397
can_id_int,
375398
)
399+
self._on_error(Timestamp.now(), CANTransport.Error.SEND_TIMEOUT)
376400

377401
return not unsent_frames
378402

pycyphal/transport/can/media/_media.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from __future__ import annotations
66
import abc
7+
import enum
78
import typing
89
import asyncio
910
import warnings
@@ -28,6 +29,21 @@ class Media(abc.ABC):
2829
The timestamp is provided individually per frame.
2930
"""
3031

32+
class Error(enum.Enum):
33+
"""Media-specific error codes."""
34+
35+
CAN_TX_TIMEOUT = enum.auto() # A transmission request timed out
36+
CAN_BUS_OFF = enum.auto() # The CAN controller entered the bus-off state
37+
CAN_RX_OVERFLOW = enum.auto() # Overflow in the CAN controller
38+
CAN_TX_OVERFLOW = enum.auto() # Overflow in the CAN controller
39+
CAN_RX_WARNING = enum.auto() # The CAN controller issued a warning
40+
CAN_TX_WARNING = enum.auto() # The CAN controller issued a warning
41+
CAN_TX_PASSIVE = enum.auto() # The CAN controller entered the error passive state
42+
CAN_RX_PASSIVE = enum.auto() # The CAN controller entered the error passive state
43+
44+
ErrorHandler = typing.Callable[[Timestamp, Error], None]
45+
"""The error handler is non-blocking and non-yielding; returns immediately."""
46+
3147
VALID_MTU_SET = {8, 12, 16, 20, 24, 32, 48, 64}
3248
"""Valid MTU values for Classic CAN and CAN FD."""
3349

@@ -73,7 +89,12 @@ def number_of_acceptance_filters(self) -> int:
7389
raise NotImplementedError
7490

7591
@abc.abstractmethod
76-
def start(self, handler: ReceivedFramesHandler, no_automatic_retransmission: bool) -> None:
92+
def start(
93+
self,
94+
handler: ReceivedFramesHandler,
95+
error_handler: typing.Optional[ErrorHandler],
96+
no_automatic_retransmission: bool,
97+
) -> None:
7798
"""
7899
Every received frame shall be timestamped. Both monotonic and system timestamps are required.
79100
There are no timestamping accuracy requirements. An empty set of frames should never be reported.
@@ -97,6 +118,9 @@ def start(self, handler: ReceivedFramesHandler, no_automatic_retransmission: boo
97118
98119
:param handler: Behold my transformation. You are empowered to do as you please.
99120
121+
:param error_handler: Informs about media errors. This feature is optional in both directions.
122+
Ignore if not implemented. Set to None if error reporting is not needed by the transport.
123+
100124
:param no_automatic_retransmission: If True, the CAN controller should be configured to abort transmission
101125
of CAN frames after first error or arbitration loss (time-triggered transmission mode).
102126
This mode is used by Cyphal to facilitate the PnP node-ID allocation process on the client side.

pycyphal/transport/can/media/candump/_candump.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,12 @@ def mtu(self) -> int:
117117
def number_of_acceptance_filters(self) -> int:
118118
return 1
119119

120-
def start(self, handler: Media.ReceivedFramesHandler, no_automatic_retransmission: bool) -> None:
120+
def start(
121+
self,
122+
handler: Media.ReceivedFramesHandler,
123+
error_handler: None | Media.ErrorHandler,
124+
no_automatic_retransmission: bool,
125+
) -> None:
121126
_ = no_automatic_retransmission
122127
if self._thread is not None:
123128
raise RuntimeError(f"{self!r}: Already started")

pycyphal/transport/can/media/pythoncan/_pythoncan.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,12 @@ def is_fd(self) -> bool:
245245
"""
246246
return self._is_fd
247247

248-
def start(self, handler: Media.ReceivedFramesHandler, no_automatic_retransmission: bool) -> None:
248+
def start(
249+
self,
250+
handler: Media.ReceivedFramesHandler,
251+
error_handler: typing.Optional[Media.ErrorHandler],
252+
no_automatic_retransmission: bool,
253+
) -> None:
249254
self._tx_thread.start()
250255
if self._maybe_thread is None:
251256
self._rx_handler = handler

pycyphal/transport/can/media/socketcan/_socketcan.py

Lines changed: 139 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Author: Pavel Kirienko <pavel@opencyphal.org>
44
# pylint: disable=duplicate-code
55

6+
from dataclasses import dataclass
67
import enum
78
import time
89
import errno
@@ -29,6 +30,14 @@
2930
_logger = logging.getLogger(__name__)
3031

3132

33+
@dataclass
34+
class _TimestampedErrorList:
35+
"""Collection of media errors with a single timestamp. Used as a helper for typing."""
36+
37+
timestamp: Timestamp
38+
errors: typing.List[Media.Error]
39+
40+
3241
class SocketCANMedia(Media):
3342
"""
3443
This media implementation provides a simple interface for the standard Linux SocketCAN media layer.
@@ -125,17 +134,29 @@ def number_of_acceptance_filters(self) -> int:
125134
"""
126135
return 512
127136

128-
def start(self, handler: Media.ReceivedFramesHandler, no_automatic_retransmission: bool) -> None:
137+
def start(
138+
self,
139+
handler: Media.ReceivedFramesHandler,
140+
error_handler: typing.Optional[Media.ErrorHandler],
141+
no_automatic_retransmission: bool,
142+
) -> None:
129143
if self._maybe_thread is None:
130144
self._maybe_thread = threading.Thread(
131-
target=self._thread_function, name=str(self), args=(handler, asyncio.get_event_loop()), daemon=True
145+
target=self._thread_function,
146+
name=str(self),
147+
args=(handler, error_handler, asyncio.get_event_loop()),
148+
daemon=True,
132149
)
133150
self._maybe_thread.start()
134151
if no_automatic_retransmission:
135152
_logger.info("%s non-automatic retransmission is not supported", self)
136153
else:
137154
raise RuntimeError("The RX frame handler is already set up")
138155

156+
if error_handler is not None:
157+
err_mask = _CAN_ERR_TX_TIMEOUT | _CAN_ERR_CRTL | _CAN_ERR_BUSOFF
158+
self._sock.setsockopt(socket.SOL_CAN_RAW, _CAN_RAW_ERR_FILTER, err_mask)
159+
139160
def configure_acceptance_filters(self, configuration: typing.Sequence[FilterConfiguration]) -> None:
140161
if self._closed:
141162
raise pycyphal.transport.ResourceClosedError(repr(self))
@@ -194,14 +215,30 @@ def close(self) -> None:
194215
self._ctl_worker.close()
195216
self._ctl_main.close()
196217

197-
def _thread_function(self, handler: Media.ReceivedFramesHandler, loop: asyncio.AbstractEventLoop) -> None:
218+
def _thread_function(
219+
self,
220+
handler: Media.ReceivedFramesHandler,
221+
error_handler: typing.Optional[Media.ErrorHandler],
222+
loop: asyncio.AbstractEventLoop,
223+
) -> None:
198224
def handler_wrapper(frs: typing.Sequence[typing.Tuple[Timestamp, Envelope]]) -> None:
199225
try:
200226
if not self._closed: # Don't call after closure to prevent race conditions and use-after-close.
201227
handler(frs)
202228
except Exception as exc:
203229
_logger.exception("%s: Unhandled exception in the receive handler: %s; lost frames: %s", self, exc, frs)
204230

231+
def error_handler_wrapper(errors: _TimestampedErrorList) -> None:
232+
try:
233+
# Check if we are not closed and the handler exists
234+
if not self._closed and error_handler is not None:
235+
for error in errors.errors:
236+
error_handler(errors.timestamp, error)
237+
except Exception as exc:
238+
_logger.exception(
239+
"%s: Unhandled exception in the receive error handler: %s; lost error: %s", self, exc, errors
240+
)
241+
205242
while not self._closed and not loop.is_closed():
206243
try:
207244
(
@@ -213,14 +250,22 @@ def handler_wrapper(frs: typing.Sequence[typing.Tuple[Timestamp, Envelope]]) ->
213250

214251
if self._sock in read_ready:
215252
frames: typing.List[typing.Tuple[Timestamp, Envelope]] = []
253+
errors: typing.Optional[_TimestampedErrorList] = None
216254
try:
217255
while True:
218-
frames.append(self._read_frame(ts_mono_ns))
256+
out = self._read_frame(ts_mono_ns)
257+
if isinstance(out, _TimestampedErrorList):
258+
errors = out
259+
break # Report previously received frames first
260+
else:
261+
frames.append(out)
219262
except OSError as ex:
220263
if ex.errno != errno.EAGAIN:
221264
raise
222265
try:
223266
loop.call_soon_threadsafe(handler_wrapper, frames)
267+
if errors:
268+
loop.call_soon_threadsafe(error_handler_wrapper, errors)
224269
except RuntimeError as ex:
225270
_logger.debug("%s: Event loop is closed, exiting: %r", self, ex)
226271
break
@@ -241,7 +286,7 @@ def handler_wrapper(frs: typing.Sequence[typing.Tuple[Timestamp, Envelope]]) ->
241286
self._closed = True
242287
_logger.debug("%s thread is about to exit", self)
243288

244-
def _read_frame(self, ts_mono_ns: int) -> typing.Tuple[Timestamp, Envelope]:
289+
def _read_frame(self, ts_mono_ns: int) -> typing.Tuple[Timestamp, Envelope] | _TimestampedErrorList:
245290
while True:
246291
data, ancdata, msg_flags, _addr = self._sock.recvmsg( # type: ignore
247292
self._native_frame_size, self._ancillary_data_buffer_size
@@ -266,9 +311,13 @@ def _read_frame(self, ts_mono_ns: int) -> typing.Tuple[Timestamp, Envelope]:
266311

267312
assert ts_system_ns > 0, "Missing the timestamp; does the driver support timestamping?"
268313
timestamp = Timestamp(system_ns=ts_system_ns, monotonic_ns=ts_mono_ns)
269-
out = SocketCANMedia._parse_native_frame(data)
270-
if out is not None:
314+
out = self._parse_native_frame(data)
315+
if isinstance(out, DataFrame):
271316
return timestamp, Envelope(out, loopback=loopback)
317+
elif isinstance(out, list):
318+
return _TimestampedErrorList(timestamp, out)
319+
else:
320+
assert False, "Unreachable"
272321

273322
def _compile_native_frame(self, source: DataFrame) -> bytes:
274323
flags = _CANFD_BRS if (self._is_fd and not self._disable_brs) else 0
@@ -278,13 +327,51 @@ def _compile_native_frame(self, source: DataFrame) -> bytes:
278327
assert len(out) == self._native_frame_size
279328
return out
280329

281-
@staticmethod
282-
def _parse_native_frame(source: bytes) -> typing.Optional[DataFrame]:
330+
def _parse_native_frame(self, source: bytes) -> None | DataFrame | typing.List[Media.Error]:
283331
header_size = _FRAME_HEADER_STRUCT.size
284332
ident_raw, data_length, _flags = _FRAME_HEADER_STRUCT.unpack(source[:header_size])
285-
if (ident_raw & _CAN_RTR_FLAG) or (ident_raw & _CAN_ERR_FLAG): # Unsupported format, ignore silently
333+
if ident_raw & _CAN_RTR_FLAG: # Unsupported format, ignore silently
286334
_logger.debug("Unsupported CAN frame dropped; raw SocketCAN ID is %08x", ident_raw)
287335
return None
336+
337+
if ident_raw & _CAN_ERR_FLAG:
338+
out_error = []
339+
if ident_raw & _CAN_ERR_TX_TIMEOUT:
340+
_logger.error("Error Tx Timeout on %s", self._iface_name)
341+
out_error.append(Media.Error.CAN_TX_TIMEOUT)
342+
if ident_raw & _CAN_ERR_CRTL: # Controller problem, details are in data[1]
343+
error_byte = source[header_size + 1]
344+
if error_byte & _CAN_ERR_CRTL_RX_OVERFLOW:
345+
_logger.error("Error Rx Overflow State on %s", self._iface_name)
346+
out_error.append(Media.Error.CAN_RX_OVERFLOW)
347+
if error_byte & _CAN_ERR_CRTL_TX_OVERFLOW:
348+
_logger.error("Error Tx Overflow State on %s", self._iface_name)
349+
out_error.append(Media.Error.CAN_TX_OVERFLOW)
350+
if error_byte & _CAN_ERR_CRTL_RX_WARNING:
351+
_logger.warning("Error Rx Warning State on %s", self._iface_name)
352+
out_error.append(Media.Error.CAN_RX_WARNING)
353+
if error_byte & _CAN_ERR_CRTL_TX_WARNING:
354+
_logger.warning("Error Tx Warning State on %s", self._iface_name)
355+
out_error.append(Media.Error.CAN_TX_WARNING)
356+
if error_byte & _CAN_ERR_CRTL_TX_PASSIVE:
357+
_logger.error("Error Tx Passive State on %s", self._iface_name)
358+
out_error.append(Media.Error.CAN_TX_PASSIVE)
359+
if error_byte & _CAN_ERR_CRTL_RX_PASSIVE:
360+
_logger.error("Error Rx Passive State on %s", self._iface_name)
361+
out_error.append(Media.Error.CAN_RX_PASSIVE)
362+
if ident_raw & _CAN_ERR_BUSOFF:
363+
_logger.error("CAN Bus Off on %s", self._iface_name)
364+
out_error.append(Media.Error.CAN_BUS_OFF)
365+
366+
if len(out_error) > 0:
367+
return out_error
368+
else:
369+
_logger.debug(
370+
"Unsupported CAN error frame dropped; raw SocketCAN ID is %08x",
371+
ident_raw,
372+
)
373+
return None
374+
288375
frame_format = FrameFormat.EXTENDED if ident_raw & _CAN_EFF_FLAG else FrameFormat.BASE
289376
data = source[header_size : header_size + data_length]
290377
assert len(data) == data_length
@@ -351,6 +438,48 @@ class _NativeFrameDataCapacity(enum.IntEnum):
351438
_CAN_RTR_FLAG = 0x40000000
352439
_CAN_ERR_FLAG = 0x20000000
353440

441+
# From the Linux kernel (linux/include/uapi/linux/can/error.h); not exposed via the Python's socket module
442+
_CAN_ERR_TX_TIMEOUT = 0x00000001
443+
"""TX timeout (by netdevice driver)"""
444+
_CAN_ERR_LOSTARB = 0x00000002
445+
"""lost arbitration / data[0]"""
446+
_CAN_ERR_CRTL = 0x00000004
447+
"""controller problems / data[1]"""
448+
_CAN_ERR_PROT = 0x00000008
449+
"""protocol violations / data[2..3]"""
450+
_CAN_ERR_TRX = 0x00000010
451+
"""transceiver status / data[4]"""
452+
_CAN_ERR_ACK = 0x00000020
453+
"""received no ACK on transmission"""
454+
_CAN_ERR_BUSOFF = 0x00000040
455+
"""bus off"""
456+
_CAN_ERR_BUSERROR = 0x00000080
457+
"""bus error (may flood!)"""
458+
_CAN_ERR_RESTARTED = 0x00000100
459+
"""controller restarted"""
460+
_CAN_ERR_CNT = 0x00000200
461+
"""TX error counter / data[6], RX error counter / data[7]"""
462+
463+
_CAN_ERR_CRTL_UNSPEC = 0x00
464+
""" unspecified"""
465+
_CAN_ERR_CRTL_RX_OVERFLOW = 0x01
466+
""" RX buffer overflow"""
467+
_CAN_ERR_CRTL_TX_OVERFLOW = 0x02
468+
""" TX buffer overflow"""
469+
_CAN_ERR_CRTL_RX_WARNING = 0x04
470+
""" reached warning level for RX errors"""
471+
_CAN_ERR_CRTL_TX_WARNING = 0x08
472+
""" reached warning level for TX errors"""
473+
_CAN_ERR_CRTL_RX_PASSIVE = 0x10
474+
""" reached error passive status RX"""
475+
_CAN_ERR_CRTL_TX_PASSIVE = 0x20
476+
""" reached error passive status TX (at least one error counter exceeds the protocol-defined level of 127)"""
477+
_CAN_ERR_CRTL_ACTIVE = 0x40
478+
""" recovered to error active state"""
479+
480+
# From the Linux kernel (linux/include/uapi/linux/can/raw.h); not exposed via the Python's socket module
481+
_CAN_RAW_ERR_FILTER = 2
482+
354483
_CAN_EFF_MASK = 0x1FFFFFFF
355484

356485
# approximate sk_buffer kernel struct overhead.

0 commit comments

Comments
 (0)