Skip to content

Commit 75a9051

Browse files
fix: Fix local stack failing the hello world job
The fix adds the env variable to the executor that tells the executor it works on the local machine, and not in the docker, so it needs to query the nginx for streaming jobs via the localhost. The variable is passed to the executor by the DevExecutorManager. Impacts: miner, executor
1 parent 0168aaf commit 75a9051

File tree

6 files changed

+94
-20
lines changed

6 files changed

+94
-20
lines changed

compute_horde_sdk/src/compute_horde_core/certificate.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ async def start_nginx(
7272
job_network: str,
7373
container_name: str = "job-nginx",
7474
timeout: int = 10,
75+
on_host: bool = False,
7576
) -> None:
7677
nginx_conf_file = dir_path / "nginx.conf"
7778
nginx_conf_file.write_text(nginx_conf)
@@ -86,11 +87,17 @@ async def start_nginx(
8687
"--network",
8788
"bridge", # primary external network
8889
"-p",
89-
f"{port}:443", # expose nginx port
90-
"-v",
91-
f"{dir_path}:/etc/nginx/",
92-
NGINX_IMAGE,
90+
f"{port}:443", # expose nginx port (HTTPS)
9391
]
92+
if on_host:
93+
cmd.extend(["-p", f"{port + 1}:80"]) # expose health check port when running on host
94+
cmd.extend(
95+
[
96+
"-v",
97+
f"{dir_path}:/etc/nginx/",
98+
NGINX_IMAGE,
99+
]
100+
)
94101
process = await asyncio.create_subprocess_exec(*cmd)
95102
_stdout, _stderr = await process.communicate()
96103
await process.wait()
@@ -103,7 +110,10 @@ async def start_nginx(
103110
await process.wait()
104111

105112
# wait for nginx to start
106-
url = f"http://{ip}/ok"
113+
if on_host:
114+
url = f"http://localhost:{port + 1}/ok"
115+
else:
116+
url = f"http://{ip}/ok"
107117
nginx_started = await check_endpoint(url, timeout)
108118
if not nginx_started:
109119
stdout = _stdout.decode() if _stdout else ""

executor/app/src/compute_horde_executor/executor/job_runner.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -538,16 +538,21 @@ async def after_start_job(self):
538538
job_network=self.job_network_name,
539539
container_name=self.nginx_container_name,
540540
timeout=WAIT_FOR_NGINX_TIMEOUT,
541+
on_host=settings.EXECUTOR_ON_HOST,
541542
)
542543
except Exception as e:
543544
raise HordeError(f"Failed to start Nginx: {truncate(str(e))}") from e
544545

545546
assert self.executor_certificate is not None
546547
# check that the job is ready to serve requests
547-
ip = await get_docker_container_ip(self.nginx_container_name, bridge_network=True)
548-
logger.debug(f"Checking if streaming job is ready at http://{ip}/health")
548+
if settings.EXECUTOR_ON_HOST:
549+
health_url = f"http://localhost:{settings.NGINX_PORT + 1}/health"
550+
else:
551+
ip = await get_docker_container_ip(self.nginx_container_name, bridge_network=True)
552+
health_url = f"http://{ip}/health"
553+
logger.debug(f"Checking if streaming job is ready at {health_url}")
549554
job_ready = await check_endpoint(
550-
f"http://{ip}/health",
555+
health_url,
551556
WAIT_FOR_STREAMING_JOB_TIMEOUT, # TODO: TIMEOUTS - Remove timeout?
552557
)
553558
if not job_ready:

executor/app/src/compute_horde_executor/settings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ def wrapped(*args, **kwargs):
291291

292292
EXECUTOR_TOKEN = env.str("EXECUTOR_TOKEN")
293293
DEBUG_NO_GPU_MODE = env.bool("DEBUG_NO_GPU_MODE", default=False)
294+
EXECUTOR_ON_HOST = env.bool("EXECUTOR_ON_HOST", default=False)
294295
VOLUME_MAX_SIZE_BYTES = env.int("VOLUME_MAX_SIZE_BYTES", default=2147483648) # 2GB
295296
OUTPUT_ZIP_UPLOAD_MAX_SIZE_BYTES = env.int(
296297
"OUTPUT_ZIP_UPLOAD_MAX_SIZE_BYTES", default=2147483648

local_stack/uv.lock

Lines changed: 55 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

miner/app/src/compute_horde_miner/miner/executor_manager/_internal/dev.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,19 @@ async def start_new_executor(self, token, executor_class, timeout):
2424
logger.debug(f"Starting new executor process: {token=}")
2525
nginx_port = executor_port_dispenser.get_port()
2626

27+
env = {
28+
"MINER_ADDRESS": f"ws://{settings.ADDRESS_FOR_EXECUTORS}:{settings.PORT_FOR_EXECUTORS}",
29+
"EXECUTOR_TOKEN": token,
30+
"PATH": os.environ["PATH"],
31+
"NGINX_PORT": str(nginx_port),
32+
"EXECUTOR_ON_HOST": "1",
33+
# Enable hf_transfer download acceleration package
34+
# https://huggingface.co/docs/huggingface_hub/package_reference/environment_variables#hfhubenablehftransfer
35+
"HF_HUB_ENABLE_HF_TRANSFER": "1",
36+
}
37+
if docker_host := os.environ.get("DOCKER_HOST"):
38+
env["DOCKER_HOST"] = docker_host
39+
2740
return subprocess.Popen(
2841
[
2942
"uv",
@@ -33,15 +46,7 @@ async def start_new_executor(self, token, executor_class, timeout):
3346
"run_executor",
3447
*(await self.get_executor_cmdline_args()),
3548
],
36-
env={
37-
"MINER_ADDRESS": f"ws://{settings.ADDRESS_FOR_EXECUTORS}:{settings.PORT_FOR_EXECUTORS}",
38-
"EXECUTOR_TOKEN": token,
39-
"PATH": os.environ["PATH"],
40-
"NGINX_PORT": str(nginx_port),
41-
# Enable hf_transfer download acceleration package
42-
# https://huggingface.co/docs/huggingface_hub/package_reference/environment_variables#hfhubenablehftransfer
43-
"HF_HUB_ENABLE_HF_TRANSFER": "1",
44-
},
49+
env=env,
4550
cwd=executor_dir,
4651
)
4752

miner/envs/dev/.env.template

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ DATABASE_POOL_URL=
1313
DATABASE_URL=postgres://postgres:12345@localhost:8432/compute_horde_miner
1414

1515
EXECUTOR_MANAGER_CLASS_PATH=compute_horde_miner.miner.executor_manager.dev:DevExecutorManager
16-
EXECUTOR_IMAGE=backenddevelopersltd/compute-horde-executor:v0-latest
16+
EXECUTOR_IMAGE=backenddevelopersltd/compute-horde-executor:v1-latest
1717
ADDRESS_FOR_EXECUTORS=localhost
1818
PORT_FOR_EXECUTORS=8000
1919
BITTENSOR_MINER_ADDRESS=127.0.0.1

0 commit comments

Comments
 (0)