Skip to content

Commit 90ed2b1

Browse files
committed
Merge commit 'a9a9b96ce78019dd5dafa379d8f097df7e761d3e' as 'modules/atom01_train/rsl_rl'
2 parents 45c6da8 + a9a9b96 commit 90ed2b1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+7798
-0
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Copyright (c) 2021-2026, ETH Zurich and NVIDIA CORPORATION
2+
All rights reserved.
3+
4+
SPDX-License-Identifier: BSD-3-Clause
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# IDEs
2+
.idea
3+
4+
# builds
5+
*.egg-info
6+
build/*
7+
dist/*
8+
9+
# cache
10+
__pycache__
11+
.pytest_cache
12+
13+
# vs code
14+
.vscode
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
repos:
2+
- repo: https://github.com/astral-sh/ruff-pre-commit
3+
rev: v0.14.0
4+
hooks:
5+
- id: ruff-check
6+
- id: ruff-format
7+
- repo: https://github.com/pre-commit/pre-commit-hooks
8+
rev: v4.5.0
9+
hooks:
10+
- id: check-symlinks
11+
- id: destroyed-symlinks
12+
- id: check-yaml
13+
- id: check-toml
14+
- id: check-merge-conflict
15+
- id: check-case-conflict
16+
- id: check-executables-have-shebangs
17+
- id: check-shebang-scripts-are-executable
18+
- id: detect-private-key
19+
- repo: https://github.com/codespell-project/codespell
20+
rev: v2.2.6
21+
hooks:
22+
- id: codespell
23+
additional_dependencies:
24+
- tomli
25+
- repo: https://github.com/Lucas-C/pre-commit-hooks
26+
rev: v1.5.1
27+
hooks:
28+
- id: insert-license
29+
files: \.py$
30+
args:
31+
# - --remove-header # Remove existing license headers. Useful when updating license.
32+
- --license-filepath
33+
- .github/LICENSE_HEADER.txt
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
cff-version: 1.2.0
2+
title: "RSL-RL: A Learning Library for Robotics Research"
3+
message: "If you use this work, please cite the following paper."
4+
repository-code: "https://github.com/leggedrobotics/rsl_rl"
5+
license: BSD-3-Clause
6+
version: 3.3.0
7+
type: software
8+
authors:
9+
- family-names: Schwarke
10+
given-names: Clemens
11+
- family-names: Mittal
12+
given-names: Mayank
13+
- family-names: Rudin
14+
given-names: Nikita
15+
- family-names: Hoeller
16+
given-names: David
17+
keywords:
18+
- reinforcement learning
19+
- robotics
20+
- control
21+
- RSL-RL
22+
preferred-citation:
23+
type: article
24+
authors:
25+
- family-names: Schwarke
26+
given-names: Clemens
27+
- family-names: Mittal
28+
given-names: Mayank
29+
- family-names: Rudin
30+
given-names: Nikita
31+
- family-names: Hoeller
32+
given-names: David
33+
- family-names: Hutter
34+
given-names: Marco
35+
title: "RSL-RL: A Learning Library for Robotics Research"
36+
journal: "arXiv preprint"
37+
doi: 10.48550/arXiv.2509.10771
38+
url: "https://arxiv.org/abs/2509.10771"
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# RSL-RL Maintainers and Contributors
2+
3+
This is the official list of developers and contributors.
4+
5+
To see the full list of contributors, see the revision history in the source control.
6+
7+
Names should be added to this file as: individual names or organizations.
8+
9+
Email addresses are tracked elsewhere to avoid spam.
10+
11+
Please keep the lists sorted alphabetically.
12+
13+
## Maintainers
14+
15+
* Robotic Systems Lab, ETH Zurich
16+
* NVIDIA Corporation
17+
18+
---
19+
20+
* Clemens Schwarke
21+
* Mayank Mittal
22+
23+
## Authors
24+
25+
* Clemens Schwarke
26+
* David Hoeller
27+
* Mayank Mittal
28+
* Nikita Rudin
29+
30+
## Contributors
31+
32+
* Bikram Pandit
33+
* Eric Vollenweider
34+
* Fabian Jenelten
35+
* Lorenzo Terenzi
36+
* Marko Bjelonic
37+
* Matthijs van der Boon
38+
* Özhan Özen
39+
* Pascal Roth
40+
* Shaoshu Su
41+
* Zhang Chong
42+
* Ziqi Fan
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
Copyright (c) 2026, ETH Zurich
2+
Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES
3+
All rights reserved.
4+
5+
Redistribution and use in source and binary forms, with or without modification,
6+
are permitted provided that the following conditions are met:
7+
8+
1. Redistributions of source code must retain the above copyright notice,
9+
this list of conditions and the following disclaimer.
10+
11+
2. Redistributions in binary form must reproduce the above copyright notice,
12+
this list of conditions and the following disclaimer in the documentation
13+
and/or other materials provided with the distribution.
14+
15+
3. Neither the name of the copyright holder nor the names of its contributors
16+
may be used to endorse or promote products derived from this software without
17+
specific prior written permission.
18+
19+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
23+
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29+
30+
See licenses/dependencies for license information of dependencies of this package.
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
# RSL-RL
2+
3+
A fast and simple implementation of learning algorithms for robotics. For an overview of the library please have a look at https://arxiv.org/pdf/2509.10771.
4+
5+
Environment repositories using the framework:
6+
7+
* **`Isaac Lab`** (built on top of NVIDIA Isaac Sim): https://github.com/isaac-sim/IsaacLab
8+
* **`Legged Gym`** (built on top of NVIDIA Isaac Gym): https://leggedrobotics.github.io/legged_gym/
9+
* **`MuJoCo Playground`** (built on top of MuJoCo MJX and Warp): https://github.com/google-deepmind/mujoco_playground/
10+
* **`mjlab`** (built on top of MuJoCo Warp): https://github.com/mujocolab/mjlab
11+
12+
The library currently supports **PPO** and **Student-Teacher Distillation** with additional features from our research. These include:
13+
14+
* [Random Network Distillation (RND)](https://proceedings.mlr.press/v229/schwarke23a.html) - Encourages exploration by adding
15+
a curiosity driven intrinsic reward.
16+
* [Symmetry-based Augmentation](https://arxiv.org/abs/2403.04359) - Makes the learned behaviors more symmetrical.
17+
18+
We welcome contributions from the community. Please check our contribution guidelines for more
19+
information.
20+
21+
**Maintainer**: Mayank Mittal and Clemens Schwarke <br/>
22+
**Affiliation**: Robotic Systems Lab, ETH Zurich & NVIDIA <br/>
23+
**Contact**: cschwarke@ethz.ch
24+
25+
26+
## Setup
27+
28+
The package can be installed via PyPI with:
29+
30+
```bash
31+
pip install rsl-rl-lib
32+
```
33+
34+
or by cloning this repository and installing it with:
35+
36+
```bash
37+
git clone https://github.com/leggedrobotics/rsl_rl
38+
cd rsl_rl
39+
pip install -e .
40+
```
41+
42+
The package supports the following logging frameworks which can be configured through `logger`:
43+
44+
* Tensorboard: https://www.tensorflow.org/tensorboard/
45+
* Weights & Biases: https://wandb.ai/site
46+
* Neptune: https://docs.neptune.ai/
47+
48+
For a demo configuration of PPO, please check the [example_config.yaml](config/example_config.yaml) file.
49+
50+
51+
## Contribution Guidelines
52+
53+
For documentation, we adopt the [Google Style Guide](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) for docstrings. Please make sure that your code is well-documented and follows the guidelines.
54+
55+
We use the following tools for maintaining code quality:
56+
57+
- [pre-commit](https://pre-commit.com/): Runs a list of formatters and linters over the codebase.
58+
- [ruff](https://github.com/astral-sh/ruff): An extremely fast Python linter and code formatter, written in Rust.
59+
60+
Please check [here](https://pre-commit.com/#install) for instructions to set these up. To run over the entire repository, please execute the following command in the terminal:
61+
62+
```bash
63+
# for installation (only once)
64+
pre-commit install
65+
# for running
66+
pre-commit run --all-files
67+
```
68+
69+
## Citing
70+
71+
If you use this library for your research, please cite the following work:
72+
73+
```text
74+
@article{schwarke2025rslrl,
75+
title={RSL-RL: A Learning Library for Robotics Research},
76+
author={Schwarke, Clemens and Mittal, Mayank and Rudin, Nikita and Hoeller, David and Hutter, Marco},
77+
journal={arXiv preprint arXiv:2509.10771},
78+
year={2025}
79+
}
80+
```
81+
82+
If you use the library with curiosity-driven exploration (random network distillation), please cite:
83+
84+
```text
85+
@InProceedings{schwarke2023curiosity,
86+
title = {Curiosity-Driven Learning of Joint Locomotion and Manipulation Tasks},
87+
author = {Schwarke, Clemens and Klemm, Victor and Boon, Matthijs van der and Bjelonic, Marko and Hutter, Marco},
88+
booktitle = {Proceedings of The 7th Conference on Robot Learning},
89+
pages = {2594--2610},
90+
year = {2023},
91+
volume = {229},
92+
series = {Proceedings of Machine Learning Research},
93+
publisher = {PMLR},
94+
url = {https://proceedings.mlr.press/v229/schwarke23a.html},
95+
}
96+
```
97+
98+
If you use the library with symmetry augmentation, please cite:
99+
100+
```text
101+
@InProceedings{mittal2024symmetry,
102+
author={Mittal, Mayank and Rudin, Nikita and Klemm, Victor and Allshire, Arthur and Hutter, Marco},
103+
booktitle={2024 IEEE International Conference on Robotics and Automation (ICRA)},
104+
title={Symmetry Considerations for Learning Task Symmetric Robot Policies},
105+
year={2024},
106+
pages={7433-7439},
107+
doi={10.1109/ICRA57147.2024.10611493}
108+
}
109+
```
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
runner:
2+
class_name: OnPolicyRunner
3+
# General
4+
num_steps_per_env: 24 # Number of steps per environment per iteration
5+
max_iterations: 1500 # Number of policy updates
6+
seed: 1
7+
# Observations
8+
obs_groups: {"policy": ["policy"], "critic": ["policy", "privileged"]} # Maps observation groups to sets. See `vec_env.py` for more information
9+
# Logging parameters
10+
save_interval: 50 # Check for potential saves every `save_interval` iterations
11+
experiment_name: walking_experiment
12+
run_name: ""
13+
# Logging writer
14+
logger: tensorboard # tensorboard, neptune, wandb
15+
neptune_project: legged_gym
16+
wandb_project: legged_gym
17+
18+
# Policy
19+
policy:
20+
class_name: ActorCritic
21+
activation: elu
22+
actor_obs_normalization: false
23+
critic_obs_normalization: false
24+
actor_hidden_dims: [256, 256, 256]
25+
critic_hidden_dims: [256, 256, 256]
26+
init_noise_std: 1.0
27+
noise_std_type: "scalar" # 'scalar' or 'log'
28+
state_dependent_std: false
29+
30+
# Algorithm
31+
algorithm:
32+
class_name: PPO
33+
# Training
34+
learning_rate: 0.001
35+
num_learning_epochs: 5
36+
num_mini_batches: 4 # mini batch size = num_envs * num_steps / num_mini_batches
37+
schedule: adaptive # adaptive, fixed
38+
# Value function
39+
value_loss_coef: 1.0
40+
clip_param: 0.2
41+
use_clipped_value_loss: true
42+
# Surrogate loss
43+
desired_kl: 0.01
44+
entropy_coef: 0.01
45+
gamma: 0.99
46+
lam: 0.95
47+
max_grad_norm: 1.0
48+
# Miscellaneous
49+
normalize_advantage_per_mini_batch: false
50+
51+
# Random network distillation
52+
rnd_cfg:
53+
weight: 0.0 # Initial weight of the RND reward
54+
weight_schedule: null # This is a dictionary with a required key called "mode". Please check the RND module for more information
55+
reward_normalization: false # Whether to normalize RND reward
56+
# Learning parameters
57+
learning_rate: 0.001 # Learning rate for RND
58+
# Network parameters
59+
num_outputs: 1 # Number of outputs of RND network. Note: if -1, then the network will use dimensions of the observation
60+
predictor_hidden_dims: [-1] # Hidden dimensions of predictor network
61+
target_hidden_dims: [-1] # Hidden dimensions of target network
62+
63+
# Symmetry augmentation
64+
symmetry_cfg:
65+
use_data_augmentation: true # This adds symmetric trajectories to the batch
66+
use_mirror_loss: false # This adds symmetry loss term to the loss function
67+
data_augmentation_func: null # String containing the module and function name to import
68+
# Example: "legged_gym.envs.locomotion.anymal_c.symmetry:get_symmetric_states"
69+
#
70+
# .. code-block:: python
71+
#
72+
# @torch.no_grad()
73+
# def get_symmetric_states(
74+
# obs: Optional[torch.Tensor] = None, actions: Optional[torch.Tensor] = None, cfg: "BaseEnvCfg" = None, obs_type: str = "policy"
75+
# ) -> Tuple[torch.Tensor, torch.Tensor]:
76+
#
77+
mirror_loss_coeff: 0.0 # Coefficient for symmetry loss term. If 0, no symmetry loss is used

0 commit comments

Comments
 (0)