Skip to content

Commit fe7ad74

Browse files
committed
Merge branch 'refactor/split-large-test-files' into dev
2 parents b3e2f48 + f7bdaed commit fe7ad74

11 files changed

+1118
-1140
lines changed

tests/accounting/test_account_model_limits.py

Lines changed: 0 additions & 370 deletions
This file was deleted.
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
from datetime import datetime, timezone, timedelta
2+
3+
import pytest
4+
from freezegun import freeze_time
5+
6+
from llm_accounting import LLMAccounting
7+
from llm_accounting.backends.sqlite import SQLiteBackend
8+
from llm_accounting.models.limits import (LimitScope, LimitType, TimeInterval,
9+
UsageLimitDTO)
10+
11+
12+
@pytest.fixture
13+
def sqlite_backend_for_accounting(tmp_path):
14+
db_path = str(tmp_path / "test_accounting_day.sqlite")
15+
backend = SQLiteBackend(db_path=db_path)
16+
backend.initialize()
17+
yield backend
18+
backend.close()
19+
20+
21+
@pytest.fixture
22+
def accounting_instance(sqlite_backend_for_accounting):
23+
acc = LLMAccounting(backend=sqlite_backend_for_accounting)
24+
yield acc
25+
26+
27+
def test_account_model_requests_per_day(accounting_instance: LLMAccounting, sqlite_backend_for_accounting: SQLiteBackend):
28+
"""Test requests per day limit for a specific account and model."""
29+
username = "test_user_cd"
30+
model_name = "model_y"
31+
caller = "caller_rpd"
32+
33+
global_limit = UsageLimitDTO(
34+
scope=LimitScope.GLOBAL.value, limit_type=LimitType.REQUESTS.value,
35+
max_value=100, interval_unit=TimeInterval.DAY.value, interval_value=1
36+
)
37+
account_model_limit = UsageLimitDTO(
38+
scope=LimitScope.USER.value,
39+
username=username,
40+
model=model_name,
41+
limit_type=LimitType.REQUESTS.value,
42+
max_value=2,
43+
interval_unit=TimeInterval.DAY.value,
44+
interval_value=1
45+
)
46+
sqlite_backend_for_accounting.insert_usage_limit(account_model_limit)
47+
sqlite_backend_for_accounting.insert_usage_limit(global_limit)
48+
accounting_instance.quota_service.refresh_limits_cache()
49+
50+
with freeze_time("2023-01-01 00:00:00", tz_offset=0) as freezer:
51+
freezer.tick(delta=timedelta(hours=10))
52+
allowed, reason = accounting_instance.check_quota(
53+
model=model_name, username=username, caller_name=caller, input_tokens=10, completion_tokens=10
54+
)
55+
assert allowed, f"Request 1/2 for {model_name} by {username} should be allowed. Reason: {reason}"
56+
accounting_instance.track_usage(
57+
model=model_name, username=username, caller_name=caller,
58+
prompt_tokens=10, completion_tokens=10, cost=0.01, timestamp=datetime.now(timezone.utc)
59+
)
60+
61+
freezer.tick(delta=timedelta(hours=1))
62+
allowed, reason = accounting_instance.check_quota(
63+
model=model_name, username=username, caller_name=caller, input_tokens=10, completion_tokens=10
64+
)
65+
assert allowed, f"Request 2/2 for {model_name} by {username} should be allowed. Reason: {reason}"
66+
accounting_instance.track_usage(
67+
model=model_name, username=username, caller_name=caller,
68+
prompt_tokens=10, completion_tokens=10, cost=0.01, timestamp=datetime.now(timezone.utc)
69+
)
70+
71+
freezer.tick(delta=timedelta(hours=1))
72+
allowed, message = accounting_instance.check_quota(
73+
model=model_name, username=username, caller_name=caller, input_tokens=10, completion_tokens=10
74+
)
75+
assert not allowed, f"3rd request for {model_name} by {username} should be denied"
76+
assert message is not None, "Denial message should not be None"
77+
assert f"USER (user: {username})" in message
78+
assert "limit: 2.00 requests per 1 day" in message
79+
assert "exceeded. Current usage: 2.00, request: 1.00." in message
80+
81+
allowed_other_user, _ = accounting_instance.check_quota(
82+
model=model_name, username="other_user_rpd", caller_name=caller, input_tokens=10, completion_tokens=10
83+
)
84+
assert allowed_other_user, "Request for same model by other_user_rpd should be allowed"
85+
86+
allowed_other_model, _ = accounting_instance.check_quota(
87+
model="other_model_rpd", username=username, caller_name=caller, input_tokens=10, completion_tokens=10
88+
)
89+
assert allowed_other_model, f"Request for other_model_rpd by {username} should be allowed"
90+
91+
92+
def test_account_model_completion_tokens_per_day(accounting_instance: LLMAccounting, sqlite_backend_for_accounting: SQLiteBackend):
93+
"""Test completion tokens per day limit for a specific account and model."""
94+
username = "test_user_gh"
95+
model_name = "model_a"
96+
caller = "caller_ctpd"
97+
98+
global_limit = UsageLimitDTO(
99+
scope=LimitScope.GLOBAL.value, limit_type=LimitType.OUTPUT_TOKENS.value,
100+
max_value=5000, interval_unit=TimeInterval.DAY.value, interval_value=1
101+
)
102+
account_model_limit = UsageLimitDTO(
103+
scope=LimitScope.USER.value,
104+
username=username,
105+
model=model_name,
106+
limit_type=LimitType.OUTPUT_TOKENS.value,
107+
max_value=200,
108+
interval_unit=TimeInterval.DAY.value,
109+
interval_value=1
110+
)
111+
sqlite_backend_for_accounting.insert_usage_limit(account_model_limit)
112+
sqlite_backend_for_accounting.insert_usage_limit(global_limit)
113+
accounting_instance.quota_service.refresh_limits_cache()
114+
115+
with freeze_time("2023-01-01 00:00:00", tz_offset=0) as freezer:
116+
freezer.tick(delta=timedelta(seconds=0))
117+
allowed, reason = accounting_instance.check_quota(
118+
model=model_name, username=username, caller_name=caller, input_tokens=10, completion_tokens=150
119+
)
120+
assert allowed, f"Request 1 (150 tokens) for {model_name} by {username} should be allowed. Reason: {reason}"
121+
accounting_instance.track_usage(
122+
model=model_name, username=username, caller_name=caller,
123+
prompt_tokens=10, completion_tokens=150, cost=0.01, timestamp=datetime.now(timezone.utc)
124+
)
125+
126+
freezer.tick(delta=timedelta(seconds=1))
127+
allowed, message = accounting_instance.check_quota(
128+
model=model_name, username=username, caller_name=caller, input_tokens=10, completion_tokens=51
129+
)
130+
assert not allowed, f"Request 2 (51 tokens) for {model_name} by {username} should be denied"
131+
assert message is not None, "Denial message should not be None"
132+
assert f"USER (user: {username})" in message
133+
assert f"limit: 200.00 {LimitType.OUTPUT_TOKENS.value} per 1 day" in message
134+
assert "exceeded. Current usage: 150.00, request: 51.00." in message
135+
136+
allowed_other_user, _ = accounting_instance.check_quota(
137+
model=model_name, username="other_user_ctpd", caller_name=caller, input_tokens=10, completion_tokens=10
138+
)
139+
assert allowed_other_user, "Request for same model by other_user_ctpd should be allowed"
140+
141+
allowed_other_model, _ = accounting_instance.check_quota(
142+
model="other_model_ctpd", username=username, caller_name=caller, input_tokens=10, completion_tokens=10
143+
)
144+
assert allowed_other_model, f"Request for other_model_ctpd by {username} should be allowed"
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
from datetime import datetime, timezone, timedelta
2+
3+
import pytest
4+
from freezegun import freeze_time
5+
6+
from llm_accounting import LLMAccounting
7+
from llm_accounting.backends.sqlite import SQLiteBackend
8+
from llm_accounting.models.limits import (LimitScope, LimitType, TimeInterval,
9+
UsageLimitDTO)
10+
11+
12+
@pytest.fixture
13+
def sqlite_backend_for_accounting(tmp_path):
14+
db_path = str(tmp_path / "test_accounting_minute.sqlite")
15+
backend = SQLiteBackend(db_path=db_path)
16+
backend.initialize()
17+
yield backend
18+
backend.close()
19+
20+
21+
@pytest.fixture
22+
def accounting_instance(sqlite_backend_for_accounting):
23+
acc = LLMAccounting(backend=sqlite_backend_for_accounting)
24+
yield acc
25+
26+
27+
def test_account_model_requests_per_minute(accounting_instance: LLMAccounting, sqlite_backend_for_accounting: SQLiteBackend):
28+
"""Test requests per minute limit for a specific account and model."""
29+
username = "test_user_ab"
30+
model_name = "model_x"
31+
caller = "caller_rpm"
32+
33+
global_limit = UsageLimitDTO(
34+
scope=LimitScope.GLOBAL.value, limit_type=LimitType.REQUESTS.value,
35+
max_value=100, interval_unit=TimeInterval.MINUTE.value, interval_value=1
36+
)
37+
account_model_limit = UsageLimitDTO(
38+
scope=LimitScope.USER.value,
39+
username=username,
40+
model=model_name,
41+
limit_type=LimitType.REQUESTS.value,
42+
max_value=3,
43+
interval_unit=TimeInterval.MINUTE.value,
44+
interval_value=1
45+
)
46+
sqlite_backend_for_accounting.insert_usage_limit(account_model_limit)
47+
sqlite_backend_for_accounting.insert_usage_limit(global_limit)
48+
accounting_instance.quota_service.refresh_limits_cache()
49+
50+
with freeze_time("2023-01-01 00:00:00", tz_offset=0) as freezer:
51+
for i in range(3):
52+
freezer.tick(delta=timedelta(seconds=1))
53+
allowed, reason = accounting_instance.check_quota(
54+
model=model_name, username=username, caller_name=caller, input_tokens=10, completion_tokens=10
55+
)
56+
assert allowed, f"Request {i+1}/3 for {model_name} by {username} should be allowed. Reason: {reason}"
57+
accounting_instance.track_usage(
58+
model=model_name, username=username, caller_name=caller,
59+
prompt_tokens=10, completion_tokens=10, cost=0.01, timestamp=datetime.now(timezone.utc)
60+
)
61+
62+
freezer.tick(delta=timedelta(seconds=1))
63+
allowed, message = accounting_instance.check_quota(
64+
model=model_name, username=username, caller_name=caller, input_tokens=10, completion_tokens=10
65+
)
66+
assert not allowed, f"4th request for {model_name} by {username} should be denied"
67+
assert message is not None, "Denial message should not be None"
68+
assert f"USER (user: {username})" in message
69+
assert "limit: 3.00 requests per 1 minute" in message
70+
assert "exceeded. Current usage: 3.00, request: 1.00." in message
71+
72+
allowed_other_user, _ = accounting_instance.check_quota(
73+
model=model_name, username="other_user_rpm", caller_name=caller, input_tokens=10, completion_tokens=10
74+
)
75+
assert allowed_other_user, "Request for same model by other_user_rpm should be allowed"
76+
77+
allowed_other_model, _ = accounting_instance.check_quota(
78+
model="other_model_rpm", username=username, caller_name=caller, input_tokens=10, completion_tokens=10
79+
)
80+
assert allowed_other_model, f"Request for other_model_rpm by {username} should be allowed"
81+
82+
83+
def test_account_model_completion_tokens_per_minute(accounting_instance: LLMAccounting, sqlite_backend_for_accounting: SQLiteBackend):
84+
"""Test completion tokens per minute limit for a specific account and model."""
85+
username = "test_user_ef"
86+
model_name = "model_z"
87+
caller = "caller_ctpm"
88+
89+
global_limit = UsageLimitDTO(
90+
scope=LimitScope.GLOBAL.value, limit_type=LimitType.OUTPUT_TOKENS.value,
91+
max_value=5000, interval_unit=TimeInterval.MINUTE.value, interval_value=1
92+
)
93+
account_model_limit = UsageLimitDTO(
94+
scope=LimitScope.USER.value,
95+
username=username,
96+
model=model_name,
97+
limit_type=LimitType.OUTPUT_TOKENS.value,
98+
max_value=1000,
99+
interval_unit=TimeInterval.MINUTE.value,
100+
interval_value=1
101+
)
102+
sqlite_backend_for_accounting.insert_usage_limit(account_model_limit)
103+
sqlite_backend_for_accounting.insert_usage_limit(global_limit)
104+
accounting_instance.quota_service.refresh_limits_cache()
105+
106+
with freeze_time("2023-01-01 00:00:00", tz_offset=0) as freezer:
107+
freezer.tick(delta=timedelta(seconds=0))
108+
allowed, reason = accounting_instance.check_quota(
109+
model=model_name, username=username, caller_name=caller, input_tokens=10, completion_tokens=500
110+
)
111+
assert allowed, f"Request 1 (500 tokens) for {model_name} by {username} should be allowed. Reason: {reason}"
112+
accounting_instance.track_usage(
113+
model=model_name, username=username, caller_name=caller,
114+
prompt_tokens=10, completion_tokens=500, cost=0.01, timestamp=datetime.now(timezone.utc)
115+
)
116+
117+
freezer.tick(delta=timedelta(seconds=1))
118+
allowed, reason = accounting_instance.check_quota(
119+
model=model_name, username=username, caller_name=caller, input_tokens=10, completion_tokens=500
120+
)
121+
assert allowed, f"Request 2 (500 tokens) for {model_name} by {username} should be allowed. Reason: {reason}"
122+
accounting_instance.track_usage(
123+
model=model_name, username=username, caller_name=caller,
124+
prompt_tokens=10, completion_tokens=500, cost=0.01, timestamp=datetime.now(timezone.utc)
125+
)
126+
127+
freezer.tick(delta=timedelta(seconds=1))
128+
allowed, message = accounting_instance.check_quota(
129+
model=model_name, username=username, caller_name=caller, input_tokens=10, completion_tokens=1
130+
)
131+
assert not allowed, f"Request 3 (1 token) for {model_name} by {username} should be denied"
132+
assert message is not None, "Denial message should not be None"
133+
assert f"USER (user: {username})" in message
134+
assert f"limit: 1000.00 {LimitType.OUTPUT_TOKENS.value} per 1 minute" in message
135+
assert "exceeded. Current usage: 1000.00, request: 1.00." in message
136+
137+
allowed_other_user, _ = accounting_instance.check_quota(
138+
model=model_name, username="other_user_ctpm", caller_name=caller, input_tokens=10, completion_tokens=10
139+
)
140+
assert allowed_other_user, "Request for same model by other_user_ctpm should be allowed"
141+
142+
allowed_other_model, _ = accounting_instance.check_quota(
143+
model="other_model_ctpm", username=username, caller_name=caller, input_tokens=10, completion_tokens=10
144+
)
145+
assert allowed_other_model, f"Request for other_model_ctpm by {username} should be allowed"
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
from datetime import datetime, timezone, timedelta
2+
3+
import pytest
4+
from freezegun import freeze_time
5+
6+
from llm_accounting import LLMAccounting
7+
from llm_accounting.backends.sqlite import SQLiteBackend
8+
from llm_accounting.models.limits import (LimitScope, LimitType, TimeInterval,
9+
UsageLimitDTO)
10+
11+
12+
@pytest.fixture
13+
def sqlite_backend_for_accounting(tmp_path):
14+
db_path = str(tmp_path / "test_accounting_total.sqlite")
15+
backend = SQLiteBackend(db_path=db_path)
16+
backend.initialize()
17+
yield backend
18+
backend.close()
19+
20+
21+
@pytest.fixture
22+
def accounting_instance(sqlite_backend_for_accounting):
23+
acc = LLMAccounting(backend=sqlite_backend_for_accounting)
24+
yield acc
25+
26+
27+
def test_account_total_requests_per_minute(accounting_instance: LLMAccounting, sqlite_backend_for_accounting: SQLiteBackend):
28+
"""Test account-wide total requests per minute, ensuring it sums across models and takes precedence."""
29+
username = "test_user_account_wide"
30+
caller = "caller_account_total"
31+
32+
# Account-wide limit (no model specified)
33+
account_wide_limit = UsageLimitDTO(
34+
scope=LimitScope.USER.value,
35+
username=username,
36+
model=None, # Explicitly None for account-wide
37+
caller_name=None, # Explicitly None for account-wide
38+
limit_type=LimitType.REQUESTS.value,
39+
max_value=4,
40+
interval_unit=TimeInterval.MINUTE.value,
41+
interval_value=1
42+
)
43+
user_model_specific_limit = UsageLimitDTO(
44+
scope=LimitScope.USER.value, # Could also be MODEL scope if username and model are set
45+
username=username,
46+
model="specific_model_q",
47+
limit_type=LimitType.REQUESTS.value,
48+
max_value=10, # Higher than the account-wide limit
49+
interval_unit=TimeInterval.MINUTE.value,
50+
interval_value=1
51+
)
52+
global_limit = UsageLimitDTO(
53+
scope=LimitScope.GLOBAL.value, limit_type=LimitType.REQUESTS.value,
54+
max_value=100, interval_unit=TimeInterval.MINUTE.value, interval_value=1
55+
)
56+
sqlite_backend_for_accounting.insert_usage_limit(account_wide_limit)
57+
sqlite_backend_for_accounting.insert_usage_limit(user_model_specific_limit)
58+
sqlite_backend_for_accounting.insert_usage_limit(global_limit)
59+
accounting_instance.quota_service.refresh_limits_cache()
60+
61+
with freeze_time("2023-01-01 00:00:00", tz_offset=0) as freezer:
62+
# Track 2 requests for model_a
63+
for i in range(2):
64+
freezer.tick(delta=timedelta(seconds=1))
65+
allowed, reason = accounting_instance.check_quota(
66+
model="model_a", username=username, caller_name=caller, input_tokens=10, completion_tokens=10
67+
)
68+
assert allowed, f"Request {i+1}/2 for model_a by {username} should be allowed. Reason: {reason}"
69+
accounting_instance.track_usage(
70+
model="model_a", username=username, caller_name=caller,
71+
prompt_tokens=10, completion_tokens=10, cost=0.01, timestamp=datetime.now(timezone.utc)
72+
)
73+
74+
# Track 2 requests for model_b (total 4 requests for the user)
75+
for i in range(2):
76+
freezer.tick(delta=timedelta(seconds=1))
77+
allowed, reason = accounting_instance.check_quota(
78+
model="model_b", username=username, caller_name=caller, input_tokens=10, completion_tokens=10
79+
)
80+
assert allowed, f"Request {i+1}/2 for model_b by {username} should be allowed. Reason: {reason}"
81+
accounting_instance.track_usage(
82+
model="model_b", username=username, caller_name=caller,
83+
prompt_tokens=10, completion_tokens=10, cost=0.01, timestamp=datetime.now(timezone.utc)
84+
)
85+
86+
freezer.tick(delta=timedelta(seconds=1))
87+
88+
allowed, message = accounting_instance.check_quota(
89+
model="model_c", username=username, caller_name=caller, input_tokens=10, completion_tokens=10
90+
)
91+
assert not allowed, f"5th request for model_c by {username} should be denied by account-wide limit"
92+
assert message is not None, "Denial message should not be None for 5th request"
93+
assert f"USER (user: {username}) limit: 4.00 requests per 1 minute" in message
94+
assert "exceeded. Current usage: 4.00, request: 1.00." in message
95+
96+
allowed_specific, message_specific = accounting_instance.check_quota(
97+
model="specific_model_q", username=username, caller_name=caller, input_tokens=10, completion_tokens=10
98+
)
99+
assert not allowed_specific, \
100+
f"Request for specific_model_q by {username} should be denied by account-wide limit (already at 4 requests)"
101+
assert message_specific is not None, "Denial message should not be None for specific_model_q"
102+
assert f"USER (user: {username}) limit: 4.00 requests per 1 minute" in message_specific
103+
assert "exceeded. Current usage: 4.00, request: 1.00." in message_specific
104+
105+
allowed_other_user, _ = accounting_instance.check_quota(
106+
model="model_a", username="other_user_account", caller_name=caller, input_tokens=10, completion_tokens=10
107+
)
108+
assert allowed_other_user, "Request for model_a by other_user_account should be allowed"

0 commit comments

Comments
 (0)