Creating a Basic Environment#
This tutorial shows you how to create a simple robot learning environment using EmbodiChain’s Gym interface. You’ll learn how to inherit from the base environment class, set up robots and objects, define actions and observations, and run training scenarios.
The Code#
The tutorial corresponds to the random_reach.py script in the scripts/tutorials/gym directory.
Code for random_reach.py
1# ----------------------------------------------------------------------------
2# Copyright (c) 2021-2025 DexForce Technology Co., Ltd.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15# ----------------------------------------------------------------------------
16
17import torch
18import numpy as np
19import gymnasium as gym
20
21from embodichain.lab.gym.envs import BaseEnv, EnvCfg
22from embodichain.lab.sim import SimulationManagerCfg
23from embodichain.lab.sim.types import EnvAction, EnvObs
24from embodichain.lab.sim.shapes import CubeCfg
25from embodichain.lab.sim.objects import RigidObject, Robot
26from embodichain.lab.sim.cfg import (
27 RobotCfg,
28 RigidObjectCfg,
29 RigidBodyAttributesCfg,
30)
31from embodichain.lab.gym.utils.registration import register_env
32
33
34@register_env("RandomReach-v1", max_episode_steps=100, override=True)
35class RandomReachEnv(BaseEnv):
36
37 robot_init_qpos = np.array(
38 [1.57079, -1.57079, 1.57079, -1.57079, -1.57079, -3.14159]
39 )
40
41 def __init__(
42 self,
43 num_envs=1,
44 headless=False,
45 device="cpu",
46 **kwargs,
47 ):
48 env_cfg = EnvCfg(
49 sim_cfg=SimulationManagerCfg(
50 headless=headless, arena_space=2.0, sim_device=device
51 ),
52 num_envs=num_envs,
53 )
54
55 super().__init__(
56 cfg=env_cfg,
57 **kwargs,
58 )
59
60 def _setup_robot(self, **kwargs) -> Robot:
61 from embodichain.data import get_data_path
62
63 file_path = get_data_path("UniversalRobots/UR10/UR10.urdf")
64
65 robot: Robot = self.sim.add_robot(
66 cfg=RobotCfg(
67 uid="ur10",
68 fpath=file_path,
69 init_pos=(0, 0, 1),
70 init_qpos=self.robot_init_qpos,
71 )
72 )
73
74 qpos_limits = robot.body_data.qpos_limits[0].cpu().numpy()
75 self.single_action_space = gym.spaces.Box(
76 low=qpos_limits[:, 0], high=qpos_limits[:, 1], dtype=np.float32
77 )
78
79 return robot
80
81 def _prepare_scene(self, **kwargs) -> None:
82 size = 0.03
83 # Create a kinematic cube object without collision.
84 # Currently, we use this workaround for visualization purposes.
85 self.cube: RigidObject = self.sim.add_rigid_object(
86 cfg=RigidObjectCfg(
87 uid="cube",
88 shape=CubeCfg(size=[size, size, size]),
89 attrs=RigidBodyAttributesCfg(enable_collision=False),
90 init_pos=(0.0, 0.0, 0.5),
91 body_type="kinematic",
92 ),
93 )
94
95 def _update_sim_state(self, **kwargs) -> None:
96 pose = torch.eye(4, device=self.device)
97 pose = pose.unsqueeze_(0).repeat(self.num_envs, 1, 1)
98 pose[:, :3, 3] += torch.rand(self.num_envs, 3, device=self.device) * 0.5 - 0.25
99 self.cube.set_local_pose(pose=pose)
100
101 def _step_action(self, action: EnvAction) -> EnvAction:
102 self.robot.set_qpos(qpos=action)
103 return action
104
105 def _extend_obs(self, obs: EnvObs, **kwargs) -> EnvObs:
106 # You can also use `cube = self.sim.get_rigid_object("cube")` to access obj.
107 # obs["cube_position"] = self.cube.get_local_pose()[:, :3]
108 return obs
109
110
111if __name__ == "__main__":
112 import argparse
113 import time
114
115 parser = argparse.ArgumentParser(
116 description="Demo for running a random reach environment."
117 )
118 parser.add_argument(
119 "--num_envs", type=int, default=1, help="number of environments to run"
120 )
121 parser.add_argument(
122 "--device",
123 type=str,
124 default="cpu",
125 help="device to run the environment on, e.g., 'cpu' or 'cuda'",
126 )
127 parser.add_argument("--headless", action="store_true", help="run in headless mode")
128 args = parser.parse_args()
129
130 env = gym.make(
131 "RandomReach-v1",
132 num_envs=args.num_envs,
133 headless=args.headless,
134 device=args.device,
135 )
136
137 for episode in range(10):
138 print("Episode:", episode)
139 env.reset()
140 start_time = time.time()
141 total_steps = 0
142
143 for i in range(100):
144 action = env.action_space.sample()
145 action = torch.as_tensor(action, dtype=torch.float32, device=env.device)
146
147 init_pose = env.robot_init_qpos
148 init_pose = (
149 torch.as_tensor(init_pose, dtype=torch.float32, device=env.device)
150 .unsqueeze_(0)
151 .repeat(env.num_envs, 1)
152 )
153 action = (
154 init_pose
155 + torch.rand_like(action, dtype=torch.float32, device=env.device) * 0.2
156 - 0.1
157 )
158
159 obs, reward, done, truncated, info = env.step(action)
160 total_steps += env.num_envs
161
162 end_time = time.time()
163 elapsed_time = end_time - start_time
164 if elapsed_time > 0:
165 fps = total_steps / elapsed_time
166 print(f"Total steps: {total_steps}")
167 print(f"Elapsed time: {elapsed_time:.2f} seconds")
168 print(f"FPS: {fps:.2f}")
169 else:
170 print("Elapsed time is too short to calculate FPS.")
The Code Explained#
This tutorial demonstrates how to create a custom RL environment by inheriting from envs.BaseEnv. The environment implements a simple reach task where a robot arm tries to reach randomly positioned targets.
Environment Registration#
First, we register the environment with the Gymnasium registry using the utils.registration.register_env() decorator:
@register_env("RandomReach-v1", max_episode_steps=100, override=True)
class RandomReachEnv(BaseEnv):
The decorator parameters define:
Environment ID:
"RandomReach-v1"- unique identifier for the environmentmax_episode_steps: Maximum steps per episode (100 in this case)
override: Whether to override existing environment with same ID
Environment Initialization#
The __init__ method configures the simulation environment and calls the parent constructor:
from embodichain.lab.sim.objects import RigidObject, Robot
from embodichain.lab.sim.cfg import (
RobotCfg,
RigidObjectCfg,
RigidBodyAttributesCfg,
)
from embodichain.lab.gym.utils.registration import register_env
@register_env("RandomReach-v1", max_episode_steps=100, override=True)
class RandomReachEnv(BaseEnv):
robot_init_qpos = np.array(
[1.57079, -1.57079, 1.57079, -1.57079, -1.57079, -3.14159]
)
def __init__(
self,
num_envs=1,
headless=False,
device="cpu",
**kwargs,
Key configuration options include:
num_envs: Number of parallel environments to run
headless: Whether to run without GUI (useful for training)
device: Computation device (“cpu” or “cuda”)
Robot Setup#
The _setup_robot method loads and configures the robot for the environment:
def _setup_robot(self, **kwargs) -> Robot:
from embodichain.data import get_data_path
file_path = get_data_path("UniversalRobots/UR10/UR10.urdf")
robot: Robot = self.sim.add_robot(
cfg=RobotCfg(
uid="ur10",
fpath=file_path,
init_pos=(0, 0, 1),
init_qpos=self.robot_init_qpos,
)
)
qpos_limits = robot.body_data.qpos_limits[0].cpu().numpy()
self.single_action_space = gym.spaces.Box(
low=qpos_limits[:, 0], high=qpos_limits[:, 1], dtype=np.float32
)
return robot
This method demonstrates:
URDF Loading: Using data module to access robot URDF files
Robot Configuration: Setting initial position and joint configuration
Action Space Definition: Creating action space based on joint limits
The action space is automatically derived from the robot’s joint limits, ensuring actions stay within valid ranges.
Scene Preparation#
The _prepare_scene() method adds additional objects to the simulation environment:
)
qpos_limits = robot.body_data.qpos_limits[0].cpu().numpy()
self.single_action_space = gym.spaces.Box(
low=qpos_limits[:, 0], high=qpos_limits[:, 1], dtype=np.float32
)
return robot
def _prepare_scene(self, **kwargs) -> None:
size = 0.03
# Create a kinematic cube object without collision.
# Currently, we use this workaround for visualization purposes.
In this example, we add a kinematic cube that serves as a visual target. The cube is configured with:
No collision:
enable_collision=Falsefor visualization onlyKinematic body: Can be moved programmatically without physics
Custom size: Small 3cm cube for target visualization
initial position: Initially placed at a fixed location
State Updates#
The _update_sim_state method is called at each simulation step to update object states:
def _update_sim_state(self, **kwargs) -> None:
pose = torch.eye(4, device=self.device)
pose = pose.unsqueeze_(0).repeat(self.num_envs, 1, 1)
pose[:, :3, 3] += torch.rand(self.num_envs, 3, device=self.device) * 0.5 - 0.25
self.cube.set_local_pose(pose=pose)
This method randomizes the cube’s position. The pose is updated for all parallel environments simultaneously.
Note that this method is called after perform action execution and simulation update but before observation collection. For more details, see envs.BaseEnv.step().
Action Execution#
The _step_action method applies actions to the robot:
def _step_action(self, action: EnvAction) -> EnvAction:
self.robot.set_qpos(qpos=action)
return action
In this simple environment, actions directly set joint positions. More complex environments might:
Convert actions to joint torques or velocities
Apply action filtering or scaling
Implement inverse kinematics for end-effector control
Observation Extension#
The default observations include the following keys:
robot: Robot proprioception data (joint positions, velocities, efforts)
sensor (optional): Data from any sensors (e.g., cameras)
The _extend_obs method allows you to add custom observations:
def _extend_obs(self, obs: EnvObs, **kwargs) -> EnvObs:
# You can also use `cube = self.sim.get_rigid_object("cube")` to access obj.
# obs["cube_position"] = self.cube.get_local_pose()[:, :3]
return obs
While commented out in this example, you can add custom data like:
Object positions and orientations
Distance calculations
Custom sensor readings
Task-specific state information
The Code Execution#
To run the environment:
cd /path/to/embodichain
python scripts/tutorials/gym/random_reach.py
You can customize the execution with command-line options:
# Run multiple parallel environments
python scripts/tutorials/gym/random_reach.py --num_envs 4
# Run with GPU acceleration
python scripts/tutorials/gym/random_reach.py --device cuda
# Run in headless mode (no GUI)
python scripts/tutorials/gym/random_reach.py --headless
The script demonstrates:
Environment Creation: Using
gym.make()with custom parametersEpisode Loop: Running multiple episodes with random actions
Performance Monitoring: Calculating frames per second (FPS)
Key Features Demonstrated#
This tutorial showcases several important features of EmbodiChain environments:
Gymnasium Integration: Full compatibility with the Gymnasium API
Parallel Environments: Running multiple environments simultaneously for efficient training
Robot Integration: Easy loading and control of robotic systems
Custom Objects: Adding and manipulating scene objects
Flexible Actions: Customizable action spaces and execution methods
Extensible Observations: Adding task-specific observation data