Creating a Basic Environment#

This tutorial shows you how to create a simple robot learning environment using EmbodiChain’s Gym interface. You’ll learn how to inherit from the base environment class, set up robots and objects, define actions and observations, and run training scenarios.

The Code#

The tutorial corresponds to the random_reach.py script in the scripts/tutorials/gym directory.

Code for random_reach.py
  1# ----------------------------------------------------------------------------
  2# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
  3#
  4# Licensed under the Apache License, Version 2.0 (the "License");
  5# you may not use this file except in compliance with the License.
  6# You may obtain a copy of the License at
  7#
  8#     http://www.apache.org/licenses/LICENSE-2.0
  9#
 10# Unless required by applicable law or agreed to in writing, software
 11# distributed under the License is distributed on an "AS IS" BASIS,
 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13# See the License for the specific language governing permissions and
 14# limitations under the License.
 15# ----------------------------------------------------------------------------
 16
 17import torch
 18import numpy as np
 19import gymnasium as gym
 20
 21from embodichain.lab.gym.envs import BaseEnv, EnvCfg
 22from embodichain.lab.sim import SimulationManagerCfg
 23from embodichain.lab.sim.types import EnvAction, EnvObs
 24from embodichain.lab.sim.shapes import CubeCfg
 25from embodichain.lab.sim.objects import RigidObject, Robot
 26from embodichain.lab.sim.cfg import (
 27    RenderCfg,
 28    RobotCfg,
 29    RigidObjectCfg,
 30    RigidBodyAttributesCfg,
 31)
 32from embodichain.lab.gym.utils.registration import register_env
 33
 34
 35@register_env("RandomReach-v1", override=True)
 36class RandomReachEnv(BaseEnv):
 37
 38    robot_init_qpos = np.array(
 39        [1.57079, -1.57079, 1.57079, -1.57079, -1.57079, -3.14159]
 40    )
 41
 42    def __init__(
 43        self,
 44        num_envs=1,
 45        headless=False,
 46        device="cpu",
 47        renderer="hybrid",
 48        **kwargs,
 49    ):
 50        env_cfg = EnvCfg(
 51            sim_cfg=SimulationManagerCfg(
 52                headless=headless,
 53                arena_space=2.0,
 54                sim_device=device,
 55                render_cfg=RenderCfg(renderer=renderer),
 56            ),
 57            num_envs=num_envs,
 58        )
 59
 60        super().__init__(
 61            cfg=env_cfg,
 62            **kwargs,
 63        )
 64
 65    def _setup_robot(self, **kwargs) -> Robot:
 66        from embodichain.data import get_data_path
 67
 68        file_path = get_data_path("UniversalRobots/UR10/UR10.urdf")
 69
 70        robot: Robot = self.sim.add_robot(
 71            cfg=RobotCfg(
 72                uid="ur10",
 73                fpath=file_path,
 74                init_pos=(0, 0, 1),
 75                init_qpos=self.robot_init_qpos,
 76            )
 77        )
 78
 79        qpos_limits = robot.body_data.qpos_limits[0].cpu().numpy()
 80        self.single_action_space = gym.spaces.Box(
 81            low=qpos_limits[:, 0], high=qpos_limits[:, 1], dtype=np.float32
 82        )
 83
 84        return robot
 85
 86    def _prepare_scene(self, **kwargs) -> None:
 87        size = 0.03
 88        # Create a kinematic cube object without collision.
 89        # Currently, we use this workaround for visualization purposes.
 90        self.cube: RigidObject = self.sim.add_rigid_object(
 91            cfg=RigidObjectCfg(
 92                uid="cube",
 93                shape=CubeCfg(size=[size, size, size]),
 94                attrs=RigidBodyAttributesCfg(enable_collision=False),
 95                init_pos=(0.0, 0.0, 0.5),
 96                body_type="kinematic",
 97            ),
 98        )
 99
100    def _update_sim_state(self, **kwargs) -> None:
101        pose = torch.eye(4, device=self.device)
102        pose = pose.unsqueeze_(0).repeat(self.num_envs, 1, 1)
103        pose[:, :3, 3] += torch.rand(self.num_envs, 3, device=self.device) * 0.5 - 0.25
104        self.cube.set_local_pose(pose=pose)
105
106    def _step_action(self, action: EnvAction) -> EnvAction:
107        self.robot.set_qpos(qpos=action)
108        return action
109
110    def _extend_obs(self, obs: EnvObs, **kwargs) -> EnvObs:
111        # You can also use `cube = self.sim.get_rigid_object("cube")` to access obj.
112        # obs["cube_position"] = self.cube.get_local_pose()[:, :3]
113        return obs
114
115
116if __name__ == "__main__":
117    import argparse
118    import time
119
120    from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
121
122    parser = argparse.ArgumentParser(
123        description="Demo for running a random reach environment."
124    )
125    add_env_launcher_args_to_parser(parser)
126    args = parser.parse_args()
127
128    env = gym.make(
129        "RandomReach-v1",
130        num_envs=args.num_envs,
131        headless=args.headless,
132        device=args.device,
133        renderer=args.renderer,
134    )
135
136    for episode in range(10):
137        print("Episode:", episode)
138        env.reset()
139        start_time = time.time()
140        total_steps = 0
141
142        for i in range(100):
143            action = env.action_space.sample()
144            action = torch.as_tensor(
145                action, dtype=torch.float32, device=env.get_wrapper_attr("device")
146            )
147
148            init_pose = env.unwrapped.robot_init_qpos
149            init_pose = (
150                torch.as_tensor(
151                    init_pose,
152                    dtype=torch.float32,
153                    device=env.get_wrapper_attr("device"),
154                )
155                .unsqueeze_(0)
156                .repeat(env.get_wrapper_attr("num_envs"), 1)
157            )
158            action = (
159                init_pose
160                + torch.rand_like(
161                    action, dtype=torch.float32, device=env.get_wrapper_attr("device")
162                )
163                * 0.2
164                - 0.1
165            )
166
167            obs, reward, done, truncated, info = env.step(action)
168            total_steps += env.get_wrapper_attr("num_envs")
169
170        end_time = time.time()
171        elapsed_time = end_time - start_time
172        if elapsed_time > 0:
173            fps = total_steps / elapsed_time
174            print(f"Total steps: {total_steps}")
175            print(f"Elapsed time: {elapsed_time:.2f} seconds")
176            print(f"FPS: {fps:.2f}")
177        else:
178            print("Elapsed time is too short to calculate FPS.")

The Code Explained#

This tutorial demonstrates how to create a custom RL environment by inheriting from envs.BaseEnv. The environment implements a simple reach task where a robot arm tries to reach randomly positioned targets.

Environment Registration#

First, we register the environment with the Gymnasium registry using the utils.registration.register_env() decorator:

@register_env("RandomReach-v1", override=True)
class RandomReachEnv(BaseEnv):

The decorator parameters define:

  • Environment ID: "RandomReach-v1" - unique identifier for the environment

  • override: Whether to override existing environment with same ID

Environment Initialization#

The __init__ method configures the simulation environment and calls the parent constructor:

from embodichain.lab.sim.objects import RigidObject, Robot
from embodichain.lab.sim.cfg import (
    RenderCfg,
    RobotCfg,
    RigidObjectCfg,
    RigidBodyAttributesCfg,
)
from embodichain.lab.gym.utils.registration import register_env


@register_env("RandomReach-v1", override=True)
class RandomReachEnv(BaseEnv):

    robot_init_qpos = np.array(
        [1.57079, -1.57079, 1.57079, -1.57079, -1.57079, -3.14159]
    )

    def __init__(
        self,
        num_envs=1,
        headless=False,
        device="cpu",

Key configuration options include:

  • num_envs: Number of parallel environments to run

  • headless: Whether to run without GUI (useful for training)

  • device: Computation device (“cpu” or “cuda”)

Robot Setup#

The _setup_robot method loads and configures the robot for the environment:

    def _setup_robot(self, **kwargs) -> Robot:
        from embodichain.data import get_data_path

        file_path = get_data_path("UniversalRobots/UR10/UR10.urdf")

        robot: Robot = self.sim.add_robot(
            cfg=RobotCfg(
                uid="ur10",
                fpath=file_path,
                init_pos=(0, 0, 1),
                init_qpos=self.robot_init_qpos,
            )
        )

        qpos_limits = robot.body_data.qpos_limits[0].cpu().numpy()
        self.single_action_space = gym.spaces.Box(
            low=qpos_limits[:, 0], high=qpos_limits[:, 1], dtype=np.float32
        )

        return robot

This method demonstrates:

  1. URDF Loading: Using data module to access robot URDF files

  2. Robot Configuration: Setting initial position and joint configuration

  3. Action Space Definition: Creating action space based on joint limits

The action space is automatically derived from the robot’s joint limits, ensuring actions stay within valid ranges.

Scene Preparation#

The _prepare_scene() method adds additional objects to the simulation environment:

                uid="ur10",
                fpath=file_path,
                init_pos=(0, 0, 1),
                init_qpos=self.robot_init_qpos,
            )
        )

        qpos_limits = robot.body_data.qpos_limits[0].cpu().numpy()
        self.single_action_space = gym.spaces.Box(
            low=qpos_limits[:, 0], high=qpos_limits[:, 1], dtype=np.float32
        )

        return robot

In this example, we add a kinematic cube that serves as a visual target. The cube is configured with:

  • No collision: enable_collision=False for visualization only

  • Kinematic body: Can be moved programmatically without physics

  • Custom size: Small 3cm cube for target visualization

  • initial position: Initially placed at a fixed location

State Updates#

The _update_sim_state method is called at each simulation step to update object states:

    def _update_sim_state(self, **kwargs) -> None:
        pose = torch.eye(4, device=self.device)
        pose = pose.unsqueeze_(0).repeat(self.num_envs, 1, 1)
        pose[:, :3, 3] += torch.rand(self.num_envs, 3, device=self.device) * 0.5 - 0.25
        self.cube.set_local_pose(pose=pose)

This method randomizes the cube’s position. The pose is updated for all parallel environments simultaneously.

Note that this method is called after perform action execution and simulation update but before observation collection. For more details, see envs.BaseEnv.step().

Action Execution#

The _step_action method applies actions to the robot:

    def _step_action(self, action: EnvAction) -> EnvAction:
        self.robot.set_qpos(qpos=action)
        return action

In this simple environment, actions directly set joint positions. More complex environments might:

  • Convert actions to joint torques or velocities

  • Apply action filtering or scaling

  • Implement inverse kinematics for end-effector control

Observation Extension#

The default observations include the following keys:

  • robot: Robot proprioception data (joint positions, velocities, efforts)

  • sensor (optional): Data from any sensors (e.g., cameras)

The _extend_obs method allows you to add custom observations:

    def _extend_obs(self, obs: EnvObs, **kwargs) -> EnvObs:
        # You can also use `cube = self.sim.get_rigid_object("cube")` to access obj.
        # obs["cube_position"] = self.cube.get_local_pose()[:, :3]
        return obs

While commented out in this example, you can add custom data like:

  • Object positions and orientations

  • Distance calculations

  • Custom sensor readings

  • Task-specific state information

The Code Execution#

To run the environment:

cd /path/to/embodichain
python scripts/tutorials/gym/random_reach.py

You can customize the execution with command-line options:

# Run multiple parallel environments
python scripts/tutorials/gym/random_reach.py --num_envs 4

# Run with GPU acceleration
python scripts/tutorials/gym/random_reach.py --device cuda

# Run in headless mode (no GUI)
python scripts/tutorials/gym/random_reach.py --headless

The script demonstrates:

  1. Environment Creation: Using gym.make() with custom parameters

  2. Episode Loop: Running multiple episodes with random actions

  3. Performance Monitoring: Calculating frames per second (FPS)

Key Features Demonstrated#

This tutorial showcases several important features of EmbodiChain environments:

  1. Gymnasium Integration: Full compatibility with the Gymnasium API

  2. Parallel Environments: Running multiple environments simultaneously for efficient training

  3. Robot Integration: Easy loading and control of robotic systems

  4. Custom Objects: Adding and manipulating scene objects

  5. Flexible Actions: Customizable action spaces and execution methods

  6. Extensible Observations: Adding task-specific observation data

Tip

Using an AI coding agent? Once you’re ready to create your own task environment, use the /add-task-env skill to scaffold the file with the correct structure, @register_env decorator, base class methods, and test stub. Use /add-test to write tests and /pre-commit-check to verify everything passes CI before committing.

Next Steps#