Skip to content

Commit 1d2e825

Browse files
authored
Fix decode_array double-reading length bytes for arrays with 24+ items (#479)
The custom decode_array override in serialization.py called _decode_length to check for indefinite-length arrays, then delegated to the original decode_array which called _decode_length again. For arrays with fewer than 24 items, the length is encoded directly in the subtype (no stream bytes consumed), so the double call was harmless. For 24+ items, CBOR uses multi-byte length encoding (e.g. 98 18 for 24 items) and _decode_length reads from the stream — the second call consumed actual array content as a length byte, corrupting the decode. Replace the _decode_length call with a simple subtype == 31 check, which is sufficient to detect indefinite-length arrays without consuming any bytes from the stream. This bug only affected cbor2pure, not the cbor2 C extension.
1 parent 8946e19 commit 1d2e825

File tree

2 files changed

+35
-3
lines changed

2 files changed

+35
-3
lines changed

pycardano/serialization.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,9 +196,9 @@ def wrapper(cls, value: Primitive):
196196

197197
def decode_array(self, subtype: int) -> Sequence[Any]:
198198
# Major tag 4
199-
length = self._decode_length(subtype, allow_indefinite=True)
200-
201-
if length is None:
199+
if subtype == 31:
200+
# Indefinite length array — delegate to the original decoder, then wrap
201+
# the result in IndefiniteFrozenList to preserve indefinite encoding.
202202
ret = IndefiniteFrozenList(list(self.decode_array(subtype=subtype)))
203203
ret.freeze()
204204
return ret

test/pycardano/test_serialization.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,6 +1134,38 @@ class MyTest(ArrayCBORSerializable):
11341134
assert isinstance(MyTest.from_cbor(a.to_cbor()).a, IndefiniteList)
11351135

11361136

1137+
def test_decode_array_with_24_or_more_items():
1138+
"""Test that definite-length arrays with 24+ items decode correctly.
1139+
1140+
Regression test for a bug where the custom decode_array override called
1141+
_decode_length (consuming stream bytes), then delegated to the original
1142+
decode_array which called _decode_length again. For arrays with < 24 items
1143+
the length is encoded in the subtype itself (no extra bytes), so the double
1144+
call was harmless. For 24+ items, CBOR uses multi-byte length encoding
1145+
(e.g. 98 18 for 24 items) and the second _decode_length call consumed
1146+
actual array content, corrupting the stream.
1147+
"""
1148+
1149+
@dataclass
1150+
class LargeDatum(PlutusData):
1151+
CONSTR_ID = 1
1152+
data: List[bytes]
1153+
1154+
hello = b"Hello world!"
1155+
1156+
# Exactly 24 items — the threshold where CBOR switches to 2-byte length
1157+
datum24 = LargeDatum(data=[hello] * 24)
1158+
restored24 = LargeDatum.from_cbor(datum24.to_cbor())
1159+
assert len(restored24.data) == 24
1160+
assert all(x == hello for x in restored24.data)
1161+
1162+
# 25 items — above the threshold
1163+
datum25 = LargeDatum(data=[hello] * 25)
1164+
restored25 = LargeDatum.from_cbor(datum25.to_cbor())
1165+
assert len(restored25.data) == 25
1166+
assert all(x == hello for x in restored25.data)
1167+
1168+
11371169
def test_liqwid_tx():
11381170
with open("test/resources/cbors/liqwid.json") as f:
11391171
cbor_hex = json.load(f).get("cborHex")

0 commit comments

Comments
 (0)