Skip to content

Commit dfca37f

Browse files
feat: Use Pylon client for setting weights
Validator sets weights using Pylon client now. Failed connection to Pylon service now results in set weights error system event instead of generic chain error. Impacts: validator Issue: COM-800
1 parent 8f8877c commit dfca37f

File tree

12 files changed

+145
-191
lines changed

12 files changed

+145
-191
lines changed

validator/app/src/compute_horde_validator/settings.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -698,9 +698,9 @@ def wrapped(*args, **kwargs):
698698

699699
BITTENSOR_NETUID = env.int("BITTENSOR_NETUID")
700700
BITTENSOR_NETWORK = env.str("BITTENSOR_NETWORK")
701-
PYLON_HOST = "pylon"
702-
PYLON_PORT = 8000
703-
PYLON_AUTH_TOKEN = "abc"
701+
PYLON_ADDRESS = env.str("PYLON_ADDRESS", "http://pylon:8000")
702+
PYLON_IDENTITY_NAME = env.str("PYLON_IDENTITY_NAME")
703+
PYLON_IDENTITY_TOKEN = env.str("PYLON_IDENTITY_TOKEN")
704704
# This can be explicitly set to None, which will cause some backfilling operations to never succeed. Useful when running
705705
# on a private staging net etc.
706706
BITTENSOR_ARCHIVE_NETWORK = env.str("BITTENSOR_ARCHIVE_NETWORK", "archive")
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from django.conf import settings
2+
from pylon._internal.client.sync.client import PylonClient
3+
from pylon._internal.client.sync.config import DEFAULT_RETRIES, Config
4+
from tenacity import Retrying
5+
6+
7+
def pylon_client(retries: Retrying = DEFAULT_RETRIES) -> PylonClient:
8+
return PylonClient(
9+
Config(
10+
address=settings.PYLON_ADDRESS,
11+
identity_name=settings.PYLON_IDENTITY_NAME,
12+
identity_token=settings.PYLON_IDENTITY_TOKEN,
13+
retry=retries,
14+
)
15+
)

validator/app/src/compute_horde_validator/validator/scoring/pylon_client.py

Lines changed: 0 additions & 89 deletions
This file was deleted.

validator/app/src/compute_horde_validator/validator/scoring/tasks.py

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import contextlib
22
import logging
33
import random
4-
import time
54
import traceback
65
from functools import cached_property
76
from typing import Union
@@ -16,6 +15,8 @@
1615
from django.db import transaction
1716
from numpy._typing import NDArray
1817
from pydantic import JsonValue
18+
from pylon.v1 import DEFAULT_RETRIES, BasePylonException, Hotkey, Weight
19+
from tenacity import stop_after_attempt, wait_fixed
1920

2021
from compute_horde_validator.celery import app
2122
from compute_horde_validator.validator.allowance.utils.supertensor import supertensor
@@ -25,8 +26,8 @@
2526
from compute_horde_validator.validator.models.scoring.internal import (
2627
WeightSettingFinishedEvent,
2728
)
29+
from compute_horde_validator.validator.pylon import pylon_client
2830
from compute_horde_validator.validator.scoring import create_scoring_engine
29-
from compute_horde_validator.validator.scoring.pylon_client import PylonClientError, pylon_client
3031

3132
if False:
3233
import torch
@@ -139,38 +140,35 @@ def set_scores() -> None:
139140
max_weight_limit=hyperparameters["max_weights_limit"],
140141
)
141142
hk_by_uid = {n.uid: n.hotkey for n in neurons}
142-
hk_weight_mapping = {hk_by_uid[uid]: weight for uid, weight in zip(uids, weights)}
143-
144-
for try_number in range(WEIGHT_SETTING_ATTEMPTS):
145-
logger.debug(
146-
f"Setting weights (attempt #{try_number}):\nuids={uids}\nscores={weights}"
147-
)
148-
try:
149-
pylon_client().set_weights(hk_weight_mapping)
150-
logger.info("Successfully set weights!!!")
151-
break
152-
except PylonClientError:
153-
logger.warning(
154-
"Encountered when setting weights (attempt #{try_number}): ",
155-
exc_info=True,
156-
)
157-
save_weight_setting_failure(
158-
subtype=SystemEvent.EventSubType.WRITING_TO_CHAIN_GENERIC_ERROR,
143+
hk_weight_mapping = {
144+
Hotkey(hk_by_uid[uid]): Weight(weight) for uid, weight in zip(uids, weights)
145+
}
146+
logger.debug(f"Setting weights:\nuids={uids}\nscores={weights}")
147+
with pylon_client(
148+
retries=DEFAULT_RETRIES.copy(
149+
stop=stop_after_attempt(WEIGHT_SETTING_ATTEMPTS),
150+
wait=wait_fixed(WEIGHT_SETTING_FAILURE_BACKOFF),
151+
after=lambda retry_state: save_weight_setting_failure(
152+
subtype=SystemEvent.EventSubType.SET_WEIGHTS_ERROR,
159153
long_description=traceback.format_exc(),
160-
data={"try_number": try_number, "operation": "setting/committing"},
161-
)
162-
time.sleep(WEIGHT_SETTING_FAILURE_BACKOFF)
163-
else:
164-
raise MaximumNumberOfAttemptsExceeded()
154+
data={
155+
"try_number": retry_state.attempt_number,
156+
"operation": "setting/committing",
157+
},
158+
),
159+
)
160+
) as p_client:
161+
p_client.identity.put_weights(hk_weight_mapping)
162+
logger.info("Setting weights successfully scheduled in Pylon service!")
165163
save_weight_setting_event(
166164
type_=SystemEvent.EventType.WEIGHT_SETTING_SUCCESS,
167165
subtype=SystemEvent.EventSubType.SET_WEIGHTS_SUCCESS,
168166
long_description="",
169167
data={},
170168
)
171-
except MaximumNumberOfAttemptsExceeded:
172-
msg = f"Failed to set weights after {WEIGHT_SETTING_ATTEMPTS} attempts"
173-
logger.warning(msg)
169+
except BasePylonException as e:
170+
msg = f"Failed to schedule setting weights in Pylon service after {WEIGHT_SETTING_ATTEMPTS} attempts!"
171+
logger.warning(msg, exc_info=e)
174172
save_weight_setting_failure(
175173
subtype=SystemEvent.EventSubType.GIVING_UP,
176174
long_description=msg,

validator/app/src/compute_horde_validator/validator/tests/conftest.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import logging
22
import uuid
33
from collections.abc import Generator
4-
from unittest.mock import patch
4+
from unittest.mock import create_autospec, patch
55

66
import bittensor_wallet
77
import pytest
88
from compute_horde.executor_class import EXECUTOR_CLASS
99
from compute_horde_core.executor_class import ExecutorClass
10+
from pylon._internal.client.sync.client import PylonClient
1011

1112
from ..organic_jobs.miner_driver import execute_organic_job_request
1213
from .helpers import MockNeuron
@@ -94,3 +95,14 @@ def run_uuid():
9495
# raise ValueError(
9596
# "\n" + "\n".join(f"{task.get_name()}: {task.get_coro()}" for task in tasks)
9697
# )
98+
99+
100+
@pytest.fixture
101+
def pylon_client_mock(mocker):
102+
# This is a temporary solution until pylon client implements its own mocking utility.
103+
mocked = create_autospec(PylonClient)
104+
mocked.__enter__.return_value = mocked
105+
mocked.open_access = create_autospec(PylonClient._open_access_api_cls, instance=True)
106+
mocked.identity = create_autospec(PylonClient._identity_api_cls, instance=True)
107+
mocker.patch("compute_horde_validator.validator.pylon.PylonClient", return_value=mocked)
108+
return mocked
Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
11
import pytest
22

3-
from compute_horde_validator.validator.scoring.pylon_client import setup_mock_pylon_client
43
from compute_horde_validator.validator.scoring.tasks import set_scores
54

65

76
@pytest.mark.django_db
87
@pytest.mark.override_config(SERVING=False)
9-
def test__migration__not_serving__should_not_set_scores():
10-
with setup_mock_pylon_client() as mock_pylon_client:
11-
set_scores()
12-
13-
assert mock_pylon_client.weights_submitted == []
8+
def test__migration__not_serving__should_not_set_scores(pylon_client_mock):
9+
set_scores()
10+
pylon_client_mock.identity.put_weights.assert_not_called()

0 commit comments

Comments
 (0)