Creating a Basic Environment

Creating a Basic Environment#

This tutorial shows you how to create a simple robot learning environment using EmbodiChain’s Gym interface. You’ll learn how to inherit from the base environment class, set up robots and objects, define actions and observations, and run training scenarios.

The Code#

The tutorial corresponds to the random_reach.py script in the scripts/tutorials/gym directory.

Code for random_reach.py

# ----------------------------------------------------------------------------
# Copyright (c) 2021-2025 DexForce Technology Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ----------------------------------------------------------------------------

import torch
import numpy as np
import gymnasium as gym

from embodichain.lab.gym.envs import BaseEnv, EnvCfg
from embodichain.lab.sim import SimulationManagerCfg
from embodichain.lab.sim.types import EnvAction, EnvObs
from embodichain.lab.sim.shapes import CubeCfg
from embodichain.lab.sim.objects import RigidObject, Robot
from embodichain.lab.sim.cfg import (
    RobotCfg,
    RigidObjectCfg,
    RigidBodyAttributesCfg,
)
from embodichain.lab.gym.utils.registration import register_env


@register_env("RandomReach-v1", max_episode_steps=100, override=True)
class RandomReachEnv(BaseEnv):

    robot_init_qpos = np.array(
        [1.57079, -1.57079, 1.57079, -1.57079, -1.57079, -3.14159]
    )

    def __init__(
        self,
        num_envs=1,
        headless=False,
        device="cpu",
        **kwargs,
    ):
        env_cfg = EnvCfg(
            sim_cfg=SimulationManagerCfg(
                headless=headless, arena_space=2.0, sim_device=device
            ),
            num_envs=num_envs,
        )

        super().__init__(
            cfg=env_cfg,
            **kwargs,
        )

    def _setup_robot(self, **kwargs) -> Robot:
        from embodichain.data import get_data_path

        file_path = get_data_path("UniversalRobots/UR10/UR10.urdf")

        robot: Robot = self.sim.add_robot(
            cfg=RobotCfg(
                uid="ur10",
                fpath=file_path,
                init_pos=(0, 0, 1),
                init_qpos=self.robot_init_qpos,
            )
        )

        qpos_limits = robot.body_data.qpos_limits[0].cpu().numpy()
        self.single_action_space = gym.spaces.Box(
            low=qpos_limits[:, 0], high=qpos_limits[:, 1], dtype=np.float32
        )

        return robot

    def _prepare_scene(self, **kwargs) -> None:
        size = 0.03
        # Create a kinematic cube object without collision.
        # Currently, we use this workaround for visualization purposes.
        self.cube: RigidObject = self.sim.add_rigid_object(
            cfg=RigidObjectCfg(
                uid="cube",
                shape=CubeCfg(size=[size, size, size]),
                attrs=RigidBodyAttributesCfg(enable_collision=False),
                init_pos=(0.0, 0.0, 0.5),
                body_type="kinematic",
            ),
        )

    def _update_sim_state(self, **kwargs) -> None:
        pose = torch.eye(4, device=self.device)
        pose = pose.unsqueeze_(0).repeat(self.num_envs, 1, 1)
        pose[:, :3, 3] += torch.rand(self.num_envs, 3, device=self.device) * 0.5 - 0.25
        self.cube.set_local_pose(pose=pose)

    def _step_action(self, action: EnvAction) -> EnvAction:
        self.robot.set_qpos(qpos=action)
        return action

    def _extend_obs(self, obs: EnvObs, **kwargs) -> EnvObs:
        # You can also use `cube = self.sim.get_rigid_object("cube")` to access obj.
        # obs["cube_position"] = self.cube.get_local_pose()[:, :3]
        return obs


if __name__ == "__main__":
    import argparse
    import time

    parser = argparse.ArgumentParser(
        description="Demo for running a random reach environment."
    )
    parser.add_argument(
        "--num_envs", type=int, default=1, help="number of environments to run"
    )
    parser.add_argument(
        "--device",
        type=str,
        default="cpu",
        help="device to run the environment on, e.g., 'cpu' or 'cuda'",
    )
    parser.add_argument("--headless", action="store_true", help="run in headless mode")
    args = parser.parse_args()

    env = gym.make(
        "RandomReach-v1",
        num_envs=args.num_envs,
        headless=args.headless,
        device=args.device,
    )

    for episode in range(10):
        print("Episode:", episode)
        env.reset()
        start_time = time.time()
        total_steps = 0

        for i in range(100):
            action = env.action_space.sample()
            action = torch.as_tensor(action, dtype=torch.float32, device=env.device)

            init_pose = env.robot_init_qpos
            init_pose = (
                torch.as_tensor(init_pose, dtype=torch.float32, device=env.device)
                .unsqueeze_(0)
                .repeat(env.num_envs, 1)
            )
            action = (
                init_pose
                + torch.rand_like(action, dtype=torch.float32, device=env.device) * 0.2
                - 0.1
            )

            obs, reward, done, truncated, info = env.step(action)
            total_steps += env.num_envs

        end_time = time.time()
        elapsed_time = end_time - start_time
        if elapsed_time > 0:
            fps = total_steps / elapsed_time
            print(f"Total steps: {total_steps}")
            print(f"Elapsed time: {elapsed_time:.2f} seconds")
            print(f"FPS: {fps:.2f}")
        else:
            print("Elapsed time is too short to calculate FPS.")

The Code Explained#

This tutorial demonstrates how to create a custom RL environment by inheriting from envs.BaseEnv. The environment implements a simple reach task where a robot arm tries to reach randomly positioned targets.

Environment Registration#

First, we register the environment with the Gymnasium registry using the utils.registration.register_env() decorator:

@register_env("RandomReach-v1", max_episode_steps=100, override=True)
class RandomReachEnv(BaseEnv):

The decorator parameters define:

Environment ID: "RandomReach-v1" - unique identifier for the environment
max_episode_steps: Maximum steps per episode (100 in this case)
override: Whether to override existing environment with same ID

Environment Initialization#

The __init__ method configures the simulation environment and calls the parent constructor:

from embodichain.lab.sim.objects import RigidObject, Robot
from embodichain.lab.sim.cfg import (
    RobotCfg,
    RigidObjectCfg,
    RigidBodyAttributesCfg,
)
from embodichain.lab.gym.utils.registration import register_env


@register_env("RandomReach-v1", max_episode_steps=100, override=True)
class RandomReachEnv(BaseEnv):

    robot_init_qpos = np.array(
        [1.57079, -1.57079, 1.57079, -1.57079, -1.57079, -3.14159]
    )

    def __init__(
        self,
        num_envs=1,
        headless=False,
        device="cpu",
        **kwargs,

Key configuration options include:

num_envs: Number of parallel environments to run
headless: Whether to run without GUI (useful for training)
device: Computation device (“cpu” or “cuda”)

Robot Setup#

The _setup_robot method loads and configures the robot for the environment:

    def _setup_robot(self, **kwargs) -> Robot:
        from embodichain.data import get_data_path

        file_path = get_data_path("UniversalRobots/UR10/UR10.urdf")

        robot: Robot = self.sim.add_robot(
            cfg=RobotCfg(
                uid="ur10",
                fpath=file_path,
                init_pos=(0, 0, 1),
                init_qpos=self.robot_init_qpos,
            )
        )

        qpos_limits = robot.body_data.qpos_limits[0].cpu().numpy()
        self.single_action_space = gym.spaces.Box(
            low=qpos_limits[:, 0], high=qpos_limits[:, 1], dtype=np.float32
        )

        return robot

This method demonstrates:

URDF Loading: Using data module to access robot URDF files
Robot Configuration: Setting initial position and joint configuration
Action Space Definition: Creating action space based on joint limits

The action space is automatically derived from the robot’s joint limits, ensuring actions stay within valid ranges.

Scene Preparation#

The _prepare_scene() method adds additional objects to the simulation environment:

        )

        qpos_limits = robot.body_data.qpos_limits[0].cpu().numpy()
        self.single_action_space = gym.spaces.Box(
            low=qpos_limits[:, 0], high=qpos_limits[:, 1], dtype=np.float32
        )

        return robot

    def _prepare_scene(self, **kwargs) -> None:
        size = 0.03
        # Create a kinematic cube object without collision.
        # Currently, we use this workaround for visualization purposes.

In this example, we add a kinematic cube that serves as a visual target. The cube is configured with:

No collision: enable_collision=False for visualization only
Kinematic body: Can be moved programmatically without physics
Custom size: Small 3cm cube for target visualization
initial position: Initially placed at a fixed location

State Updates#

The _update_sim_state method is called at each simulation step to update object states:

    def _update_sim_state(self, **kwargs) -> None:
        pose = torch.eye(4, device=self.device)
        pose = pose.unsqueeze_(0).repeat(self.num_envs, 1, 1)
        pose[:, :3, 3] += torch.rand(self.num_envs, 3, device=self.device) * 0.5 - 0.25
        self.cube.set_local_pose(pose=pose)

This method randomizes the cube’s position. The pose is updated for all parallel environments simultaneously.

Note that this method is called after perform action execution and simulation update but before observation collection. For more details, see envs.BaseEnv.step().

Action Execution#

The _step_action method applies actions to the robot:

    def _step_action(self, action: EnvAction) -> EnvAction:
        self.robot.set_qpos(qpos=action)
        return action

In this simple environment, actions directly set joint positions. More complex environments might:

Convert actions to joint torques or velocities
Apply action filtering or scaling
Implement inverse kinematics for end-effector control

Observation Extension#

The default observations include the following keys:

robot: Robot proprioception data (joint positions, velocities, efforts)
sensor (optional): Data from any sensors (e.g., cameras)

The _extend_obs method allows you to add custom observations:

    def _extend_obs(self, obs: EnvObs, **kwargs) -> EnvObs:
        # You can also use `cube = self.sim.get_rigid_object("cube")` to access obj.
        # obs["cube_position"] = self.cube.get_local_pose()[:, :3]
        return obs

While commented out in this example, you can add custom data like:

Object positions and orientations
Distance calculations
Custom sensor readings
Task-specific state information

The Code Execution#

To run the environment:

cd /path/to/embodichain
python scripts/tutorials/gym/random_reach.py

You can customize the execution with command-line options:

# Run multiple parallel environments
python scripts/tutorials/gym/random_reach.py --num_envs 4

# Run with GPU acceleration
python scripts/tutorials/gym/random_reach.py --device cuda

# Run in headless mode (no GUI)
python scripts/tutorials/gym/random_reach.py --headless

The script demonstrates:

Environment Creation: Using gym.make() with custom parameters
Episode Loop: Running multiple episodes with random actions
Performance Monitoring: Calculating frames per second (FPS)

Key Features Demonstrated#

This tutorial showcases several important features of EmbodiChain environments:

Gymnasium Integration: Full compatibility with the Gymnasium API
Parallel Environments: Running multiple environments simultaneously for efficient training
Robot Integration: Easy loading and control of robotic systems
Custom Objects: Adding and manipulating scene objects
Flexible Actions: Customizable action spaces and execution methods
Extensible Observations: Adding task-specific observation data