smithy-python/packages/smithy-core/tests/unit/test_retries.py at a46a6e088207475506b714b47c3475daf221b6c8 · ubaskota/smithy-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
#  Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#  SPDX-License-Identifier: Apache-2.0
import asyncio
import time
from unittest.mock import AsyncMock, patch

import pytest
from smithy_core.exceptions import CallError, RetryError
from smithy_core.retries import ExponentialBackoffJitterType as EBJT
from smithy_core.retries import (
    ExponentialRetryBackoffStrategy,
    RetryStrategyOptions,
    RetryStrategyResolver,
    SimpleRetryStrategy,
    StandardRetryQuota,
    StandardRetryStrategy,
    TokenBucket,
)


@pytest.mark.parametrize(
    "jitter_type, scale_value, max_backoff, expected_delays",
    [
        # no jitter
        (EBJT.NONE, 2, 20, [0, 2.0, 4.0, 8.0, 16.0, 20.0, 20.0]),
        (EBJT.NONE, 2.0, 20.0, [0, 2.0, 4.0, 8.0, 16.0, 20.0, 20.0]),
        (EBJT.NONE, 1.0, 20.0, [0, 1.0, 2.0, 4.0, 8.0, 16.0, 20.0]),
        (EBJT.NONE, 4.0, 2.0, [0, 2.0, 2.0, 2.0]),
        (EBJT.NONE, 23.4, 76.5, [0, 23.4, 46.8, 76.5, 76.5]),
        # full jitter
        (EBJT.FULL, 2.0, 20.0, [0, 1.0, 2.0, 4.0, 8.0, 10.0, 10.0]),
        (EBJT.FULL, 5.0, 20.0, [0, 2.5, 5.0, 10.0, 10.0]),
        (EBJT.FULL, 5.0, 10.0, [0, 2.5, 5.0, 5.0, 5.0]),
        (EBJT.FULL, 23.4, 76.5, [0, 11.7, 23.4, 38.25, 38.25]),
        # equal jitter
        (EBJT.DEFAULT, 2.0, 20.0, [0, 1.5, 3.0, 6.0, 12.0, 15.0, 15.0]),
        (EBJT.DEFAULT, 23.4, 76.5, [0, 17.55, 35.1, 57.375, 57.375]),
        # decorrelated jitter
        (EBJT.DECORRELATED, 2.0, 20.0, [0, 5.0, 9.5, 16.25, 20.0, 20.0]),
        (EBJT.DECORRELATED, 23.4, 76.5, [0, 58.5, 76.5, 76.5]),
        # edge cases with zeros
        (EBJT.NONE, 5.0, 0.0, [0, 0, 0, 0]),
        (EBJT.NONE, 0.0, 5.0, [0, 0, 0, 0]),
        (EBJT.NONE, 0.0, 0.0, [0, 0, 0, 0]),
        (EBJT.FULL, 5.0, 0.0, [0, 0, 0, 0]),
        (EBJT.FULL, 0.0, 5.0, [0, 0, 0, 0]),
        (EBJT.FULL, 0.0, 0.0, [0, 0, 0, 0]),
    ],
)
def test_exponential_backoff_strategy(
    jitter_type: EBJT,
    scale_value: float,
    max_backoff: float,
    expected_delays: list[float],
) -> None:
    bos = ExponentialRetryBackoffStrategy(
        backoff_scale_value=scale_value,
        max_backoff=max_backoff,
        jitter_type=jitter_type,
        random=lambda: 0.5,  # every generated "random" value equals 0.5
    )

    for delay_index, delay_expected in enumerate(expected_delays):
        delay_actual = bos.compute_next_backoff_delay(retry_attempt=delay_index)
        assert delay_actual == pytest.approx(delay_expected)  # type: ignore


@pytest.mark.parametrize("max_attempts", [2, 3, 10])
def test_simple_retry_strategy(max_attempts: int) -> None:
    strategy = SimpleRetryStrategy(
        backoff_strategy=ExponentialRetryBackoffStrategy(backoff_scale_value=5),
        max_attempts=max_attempts,
    )
    error = CallError(is_retry_safe=True)
    token = strategy.acquire_initial_retry_token()
    for _ in range(max_attempts - 1):
        token = strategy.refresh_retry_token_for_retry(
            token_to_renew=token, error=error
        )
    with pytest.raises(RetryError):
        strategy.refresh_retry_token_for_retry(token_to_renew=token, error=error)


def test_simple_retry_does_not_retry_unclassified() -> None:
    strategy = SimpleRetryStrategy(
        backoff_strategy=ExponentialRetryBackoffStrategy(backoff_scale_value=5),
        max_attempts=2,
    )
    token = strategy.acquire_initial_retry_token()
    with pytest.raises(RetryError):
        strategy.refresh_retry_token_for_retry(token_to_renew=token, error=Exception())


def test_simple_retry_does_not_retry_when_safety_unknown() -> None:
    strategy = SimpleRetryStrategy(
        backoff_strategy=ExponentialRetryBackoffStrategy(backoff_scale_value=5),
        max_attempts=2,
    )
    error = CallError(is_retry_safe=None)
    token = strategy.acquire_initial_retry_token()
    with pytest.raises(RetryError):
        strategy.refresh_retry_token_for_retry(token_to_renew=token, error=error)


def test_simple_retry_does_not_retry_unsafe() -> None:
    strategy = SimpleRetryStrategy(
        backoff_strategy=ExponentialRetryBackoffStrategy(backoff_scale_value=5),
        max_attempts=2,
    )
    error = CallError(fault="client", is_retry_safe=False)
    token = strategy.acquire_initial_retry_token()
    with pytest.raises(RetryError):
        strategy.refresh_retry_token_for_retry(token_to_renew=token, error=error)


@pytest.mark.parametrize("max_attempts", [2, 3, 10])
def test_standard_retry_strategy(max_attempts: int) -> None:
    strategy = StandardRetryStrategy(max_attempts=max_attempts)
    error = CallError(is_retry_safe=True)
    token = strategy.acquire_initial_retry_token()
    for _ in range(max_attempts - 1):
        token = strategy.refresh_retry_token_for_retry(
            token_to_renew=token, error=error
        )
    with pytest.raises(RetryError):
        strategy.refresh_retry_token_for_retry(token_to_renew=token, error=error)


@pytest.mark.parametrize(
    "error",
    [
        Exception(),
        CallError(is_retry_safe=None),
        CallError(fault="client", is_retry_safe=False),
    ],
    ids=[
        "unclassified_error",
        "safety_unknown_error",
        "unsafe_error",
    ],
)
def test_standard_retry_does_not_retry(error: Exception | CallError) -> None:
    strategy = StandardRetryStrategy()
    token = strategy.acquire_initial_retry_token()
    with pytest.raises(RetryError):
        strategy.refresh_retry_token_for_retry(token_to_renew=token, error=error)


def test_standard_retry_after_overrides_backoff() -> None:
    strategy = StandardRetryStrategy()
    error = CallError(is_retry_safe=True, retry_after=5.5)
    token = strategy.acquire_initial_retry_token()
    token = strategy.refresh_retry_token_for_retry(token_to_renew=token, error=error)
    assert token.retry_delay == 5.5


def test_standard_retry_invalid_max_attempts() -> None:
    with pytest.raises(ValueError):
        StandardRetryStrategy(max_attempts=-1)


@pytest.fixture
def retry_quota() -> StandardRetryQuota:
    return StandardRetryQuota(initial_capacity=10)


def test_retry_quota_initial_state(
    retry_quota: StandardRetryQuota,
) -> None:
    assert retry_quota.available_capacity == 10


def test_retry_quota_acquire_success(
    retry_quota: StandardRetryQuota,
) -> None:
    acquired = retry_quota.acquire(error=Exception())
    assert retry_quota.available_capacity == 10 - acquired


def test_retry_quota_acquire_when_exhausted(
    retry_quota: StandardRetryQuota,
) -> None:
    # Drain capacity until insufficient for next acquire
    retry_quota.acquire(error=Exception())
    retry_quota.acquire(error=Exception())

    # Not enough capacity for another retry (need 5, only 0 left)
    with pytest.raises(RetryError, match="Retry quota exceeded"):
        retry_quota.acquire(error=Exception())


def test_retry_quota_release_restores_capacity(
    retry_quota: StandardRetryQuota,
) -> None:
    acquired = retry_quota.acquire(error=Exception())
    retry_quota.release(release_amount=acquired)
    assert retry_quota.available_capacity == 10


def test_retry_quota_release_zero_adds_increment(
    retry_quota: StandardRetryQuota,
) -> None:
    retry_quota.acquire(error=Exception())
    assert retry_quota.available_capacity == 5
    retry_quota.release(release_amount=0)
    assert retry_quota.available_capacity == 6


def test_retry_quota_release_caps_at_max(
    retry_quota: StandardRetryQuota,
) -> None:
    # Drain some capacity
    retry_quota.acquire(error=Exception())
    # Release more than we acquired. Should cap at initial capacity.
    retry_quota.release(release_amount=50)
    assert retry_quota.available_capacity == 10


def test_retry_quota_acquire_timeout_error(
    retry_quota: StandardRetryQuota,
) -> None:
    timeout_error = CallError(is_timeout_error=True, is_retry_safe=True)
    acquired = retry_quota.acquire(error=timeout_error)
    assert acquired == StandardRetryQuota.TIMEOUT_RETRY_COST
    assert retry_quota.available_capacity == 0


async def test_retry_strategy_resolver_none_returns_default() -> None:
    resolver = RetryStrategyResolver()

    strategy = await resolver.resolve_retry_strategy(retry_strategy=None)

    assert isinstance(strategy, StandardRetryStrategy)
    assert strategy.max_attempts == 3


async def test_retry_strategy_resolver_creates_different_strategies() -> None:
    resolver = RetryStrategyResolver()

    options1 = RetryStrategyOptions(max_attempts=3)
    options2 = RetryStrategyOptions(max_attempts=5)

    strategy1 = await resolver.resolve_retry_strategy(retry_strategy=options1)
    strategy2 = await resolver.resolve_retry_strategy(retry_strategy=options2)

    assert strategy1.max_attempts == 3
    assert strategy2.max_attempts == 5
    assert strategy1 is not strategy2


async def test_retry_strategy_resolver_caches_strategies() -> None:
    resolver = RetryStrategyResolver()

    strategy1 = await resolver.resolve_retry_strategy(retry_strategy=None)
    strategy2 = await resolver.resolve_retry_strategy(retry_strategy=None)
    options = RetryStrategyOptions(max_attempts=5)
    strategy3 = await resolver.resolve_retry_strategy(retry_strategy=options)
    strategy4 = await resolver.resolve_retry_strategy(retry_strategy=options)

    assert strategy1 is strategy2
    assert strategy3 is strategy4
    assert strategy1 is not strategy3


async def test_retry_strategy_resolver_returns_existing_strategy() -> None:
    resolver = RetryStrategyResolver()
    provided_strategy = SimpleRetryStrategy(max_attempts=7)

    strategy = await resolver.resolve_retry_strategy(retry_strategy=provided_strategy)

    assert strategy is provided_strategy
    assert strategy.max_attempts == 7


async def test_retry_strategy_resolver_rejects_invalid_type() -> None:
    resolver = RetryStrategyResolver()

    with pytest.raises(
        TypeError,
        match="retry_strategy must be RetryStrategy, RetryStrategyOptions, or None",
    ):
        await resolver.resolve_retry_strategy(retry_strategy="invalid")  # type: ignore


class TestTokenBucket:
    @pytest.mark.asyncio
    async def test_initial_state(self):
        token_bucket = TokenBucket()
        assert token_bucket.current_capacity == token_bucket.MIN_CAPACITY
        assert token_bucket.max_capacity == token_bucket.MIN_CAPACITY
        assert token_bucket.fill_rate == token_bucket.MIN_FILL_RATE

    @pytest.mark.asyncio
    async def test_acquire_succeeds_immediately_within_capacity(self):
        token_bucket = TokenBucket()
        start_time = time.monotonic()
        await token_bucket.acquire(1)
        elapsed = time.monotonic() - start_time

        assert elapsed < 0.001  # Should be near instant
        assert token_bucket.current_capacity == 0

    @pytest.mark.asyncio
    async def test_acquire_waits_when_capacity_insufficient(self):
        token_bucket = TokenBucket(curr_capacity=0)

        with patch("asyncio.sleep", new_callable=AsyncMock) as mock_sleep:

            async def side_effect(delay: float):
                async with token_bucket._lock:  # type: ignore
                    token_bucket._curr_capacity = 1.0  # type: ignore

            mock_sleep.side_effect = side_effect
            await token_bucket.acquire(1)
            assert mock_sleep.call_count == 1

            actual_delay = mock_sleep.call_args[0][0]
            assert actual_delay == pytest.approx(2.0, abs=0.05)  # type: ignore

    @pytest.mark.asyncio
    async def test_multiple_refills_over_time(self):
        token_bucket = TokenBucket(curr_capacity=0, max_capacity=10, fill_rate=2.0)

        time_values = iter([1.0, 1.5, 4.0])
        with patch("time.monotonic", side_effect=lambda: next(time_values)):
            token_bucket._last_timestamp = 0.0  # type: ignore

            async with token_bucket._lock:  # type: ignore
                token_bucket._refill()  # type: ignore
            assert token_bucket.current_capacity == pytest.approx(2.0, abs=0.05)  # type: ignore

        with patch("time.monotonic", side_effect=lambda: next(time_values)):
            async with token_bucket._lock:  # type: ignore
                token_bucket._refill()  # type: ignore
            assert token_bucket.current_capacity == pytest.approx(3.0, abs=0.05)  # type: ignore

        with patch("time.monotonic", side_effect=lambda: next(time_values)):
            async with token_bucket._lock:  # type: ignore
                token_bucket._refill()  # type: ignore
            assert token_bucket.current_capacity == pytest.approx(8.0, abs=0.05)  # type: ignore

    @pytest.mark.asyncio
    async def test_update_bucket_updates_capacity(self):
        token_bucket = TokenBucket()

        await token_bucket.update_bucket(5.0)
        assert token_bucket.fill_rate == 5.0
        assert token_bucket.max_capacity == 5.0
        assert token_bucket.current_capacity == 1.0

    @pytest.mark.asyncio
    async def test_rate_can_never_be_zero(self):
        token_bucket = TokenBucket()
        await token_bucket.update_bucket(0.0)

        assert token_bucket.fill_rate != 0.0

    @pytest.mark.asyncio
    async def test_refill_caps_at_max_capacity(self):
        token_bucket = TokenBucket()
        # Max and current capacity of the bucket is set to 1.0 initially
        await token_bucket.update_bucket(10.0)

        async with token_bucket._lock:  # type: ignore
            token_bucket._refill()  # type: ignore

        assert token_bucket.current_capacity == pytest.approx(1.0, abs=0.05)  # type: ignore

    @pytest.mark.asyncio
    async def test_many_concurrent_tasks_succeed(self):
        token_bucket = TokenBucket(curr_capacity=2.0)
        await token_bucket.update_bucket(4.0)
        completed_tasks: list[int] = []

        async def worker(worker_id: int):
            await token_bucket.acquire(0.1)
            completed_tasks.append(worker_id)

        try:
            # At the fill rate of 4/second and acquire cost of 0.1, it should take
            # around 2 seconds to process 100 tasks.
            await asyncio.wait_for(
                asyncio.gather(*[worker(i) for i in range(100)]), timeout=3
            )
        except TimeoutError:
            pytest.fail("Deadlock detected: concurrent acquire operations timed out")

        assert len(completed_tasks) == 100
        assert len(set(completed_tasks)) == 100
        assert token_bucket.current_capacity >= 0.0