Skip to content

Commit 8339e25

Browse files
committed
pruning old nightly docker containers
1 parent 9927894 commit 8339e25

File tree

3 files changed

+15
-4
lines changed

3 files changed

+15
-4
lines changed

.github/workflows/nightly-training.yaml

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ jobs:
3737
extra_args: --command.setup_terms.motion_command.params.motion_config.motion_file=s3://far-holosoma-assets/data/motions/g1_29dof/whole_body_tracking/motion_dance_v3.npz --algo.config.buffer_size=384
3838

3939
name: Train ${{ matrix.experiment }} on ${{ matrix.multigpu == 'True' && 'multi-gpu' || 'single-gpu' }} using ${{ matrix.simulator }}
40-
timeout-minutes: 720 # 12 hours
40+
timeout-minutes: 1000 # ~16 hours
4141

4242
steps:
4343
- name: Checkout code
@@ -48,13 +48,21 @@ jobs:
4848
env:
4949
WANDB_API_KEY: ${{ secrets.WANDB_PUB_API_KEY }}
5050
shell: bash
51-
timeout-minutes: 750 # 12.5 hours
51+
timeout-minutes: 1000 # ~16 hours
52+
5253
run: |
54+
# kill all old docker containers
55+
OLD_CONTAINERS=$(docker ps -a -q)
56+
if [[ -n "$OLD_CONTAINERS" ]]; then
57+
docker stop "$OLD_CONTAINERS" || true
58+
docker rm --force "$OLD_CONTAINERS" || true
59+
fi
5360
eval "$(aws configure export-credentials --format env)"
54-
docker run --gpus all --runtime=nvidia --shm-size=12g \
61+
docker run --rm --gpus all --runtime=nvidia --shm-size=12g \
5562
-v "$GITHUB_WORKSPACE:/workspace/holosoma" \
5663
--env AWS_ACCESS_KEY_ID --env AWS_SECRET_ACCESS_KEY \
5764
--env AWS_SESSION_TOKEN --env WANDB_API_KEY \
65+
--env GITHUB_RUN_ID \
5866
982423663241.dkr.ecr.us-west-2.amazonaws.com/holosoma:latest \
5967
bash -c """
6068
source scripts/source_${{ matrix.simulator }}_setup.sh

src/holosoma/holosoma/config_types/experiment.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ class TrainingConfig:
7878
name: str = "run"
7979
"""Run name for logging. `logger.name` takes precedence if set."""
8080

81+
tags: tuple[str, ...] = ()
82+
"""Optional tags to attach to the run for logging."""
83+
8184
# Evaluation settings
8285
max_eval_steps: int | None = None
8386
"""Maximum number of evaluation steps (None for unlimited)."""

tests/nightly/nightly.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def main():
100100
config.logger,
101101
project=f"nightly-{sanitized_exp}{multigpu_suffix}",
102102
name=f"nightly-{sanitized_exp}{multigpu_suffix}-{now_timestamp()}",
103-
tags=run_tags,
103+
tags=tuple(run_tags),
104104
),
105105
)
106106

0 commit comments

Comments
 (0)