Commit c3b1f4f9 by yzh

first_amp

parent 90fb0c43
......@@ -57,7 +57,7 @@ runs
# other
*.egg-info
__pycache__
*.pt
*.swp
MUJOCO_LOG.TXT
\ No newline at end of file
The motion capture data is available only under the terms of the Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license.
https://creativecommons.org/licenses/by-nc/4.0/legalcode
https://github.com/sebastianstarke/AI4Animation
original_name new_name trim_start trim_end
D1_001_KAN01_001 dog_sit_stand00
D1_003_KAN01_001 dog_sit00
D1_004_KAN01_001 dog_sit_stand01
D1_005_KAN01_001 dog_sit_stand02
D1_006_KAN01_001 dog_sit_stand03
D1_006_KAN01_002 dog_sit_stand04
D1_007_KAN01_001** dog_walk00 1.53 (90)
D1_008_KAN01_001** dog_walk01 4.87 (290)
D1_008_KAN01_002** dog_walk02 6.60 (394)
D1_009_KAN01_001** dog_walk03
D1_009_KAN01_002** dog_walk04 3.90 (232)
D1_010_KAN01_001** dog_run00 6.65 (397)
D1_010_KAN01_002** dog_run01
D1_010_KAN01_003** dog_run02
D1_010_KAN01_004* dog_run03
D1_013_KAN01_001 dog_sit01
D1_025_KAN01_001** dog_walk05 3.37 (200) 9.10 (545)
D1_031_KAN01_001** dog_run04 8.15 (487) 11.95 (716)
D1_045_KAN01_001** dog_walk06 2.87 (170) 6.57 (392)
D1_047_KAN01_001** dog_turn00 5.47 (326) 12.47 (747)
D1_047z_KAN01_002* dog_walk_run00
D1_047z_KAN01_003 dog_hop00
D1_047z_KAN01_004 dog_hop01
D1_047z_KAN01_005 dog_walk07
D1_049_KAN01_001 dog_walk08
D1_053_KAN01_001** dog_walk09
D1_053_KAN01_002* dog_walk10
D1_055_KAN01_001 dog_walk_run01
D1_057_KAN01_001* dog_jump00 15.68 (940) 19.12 (1146)
D1_058_KAN01_001* dog_jump01 25.68 (1539) 29.20 (1751)
D1_059_KAN01_001 dog_beg00
D1_061_KAN01_001 dog_walk_run02
D1_061z_KAN01_002 dog_walk11
D1_061z_KAN01_003 dog_walk12
D1_071_KAN02_002 dog_sit02
D1_073_KAN02_002* dog_walk13
D1_086_KAN02_001 dog_beg01
D1_ex01_KAN01_001* dog_walk14
D1_ex02_KAN02_001 dog_sit_walk00
D1_ex03_KAN02_001 dog_sit_walk01
D1_ex03_KAN02_003 dog_walk15
D1_ex03_KAN02_006 dog_jump_down0 33.90 (2033) 36.10 (2165)
D1_ex03_KAN02_012 dog_walk16
D1_ex03_KAN02_013 dog_shake0
D1_ex03_KAN02_014 dog_walk17
D1_ex04_KAN02_001* dog_stepup_jump0 5.65 (337) 12.58 (754)
D1_ex04_KAN02_002* dog_stepup_jump1 4.20 (250) 8.38 (502)
D1_ex04_KAN02_003* dog_stepup_jump2
D1_ex04_KAN02_004* dog_stepup_jump3 7.98 (477)
D1_ex05_KAN02_001 dog_sit_walk02
D1_ex06_KAN02_001 dog_walk18
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
import numpy as np
from legged_gym import LEGGED_GYM_ROOT_DIR
VISUALIZE_RETARGETING = True
URDF_FILENAME = "{LEGGED_GYM_ROOT_DIR}/resources/robots/a1/urdf/a1.urdf".format(LEGGED_GYM_ROOT_DIR=LEGGED_GYM_ROOT_DIR)
OUTPUT_DIR = "{LEGGED_GYM_ROOT_DIR}/../datasets/mocap_motions_a1".format(LEGGED_GYM_ROOT_DIR=LEGGED_GYM_ROOT_DIR)
REF_POS_SCALE = 0.825 # 缩放系数,如果遇到关节限位异常,尝试将此数变小
INIT_POS = np.array([0, 0, 0.32]) # a1
INIT_ROT = np.array([0, 0, 0, 1.0])
SIM_TOE_JOINT_IDS = [6, 11, 16, 21]
SIM_HIP_JOINT_IDS = [2, 7, 12, 17]
SIM_ROOT_OFFSET = np.array([0, 0, -0.04])
SIM_TOE_OFFSET_LOCAL = [
np.array([0.0, -0.06, 0.0]),
np.array([0.0, 0.06, 0.0]),
np.array([0.0, -0.06, 0.0]),
np.array([0.0, 0.06, 0.0])
]
TOE_HEIGHT_OFFSET = 0.02
DEFAULT_JOINT_POSE = np.array([0.1, 0.8, -1.5,
0.1, 1.0, -1.5,
-0.1, 0.8, -1.5,
-0.10, 1.0, -1.5])
JOINT_DAMPING = [0.1, 0.05, 0.01,
0.1, 0.05, 0.01,
0.1, 0.05, 0.01,
0.1, 0.05, 0.01]
FORWARD_DIR_OFFSET = np.array([0, 0, 0])
FR_FOOT_NAME = "FR_foot"
FL_FOOT_NAME = "FL_foot"
HR_FOOT_NAME = "RR_foot"
HL_FOOT_NAME = "RL_foot"
MOCAP_MOTIONS = [
# Output motion name, input file, frame start, frame end, motion weight.
["pace0", "datasets/keypoint_datasets/ai4animation/dog_walk00_joint_pos.txt", 162, 201, 1],
["pace1", "datasets/keypoint_datasets/ai4animation/dog_walk00_joint_pos.txt", 201, 400, 1],
["pace2", "datasets/keypoint_datasets/ai4animation/dog_walk00_joint_pos.txt", 400, 600, 1],
["trot0", "datasets/keypoint_datasets/ai4animation/dog_walk03_joint_pos.txt", 448, 481, 1],
["trot1", "datasets/keypoint_datasets/ai4animation/dog_walk03_joint_pos.txt", 400, 600, 1],
["trot2", "datasets/keypoint_datasets/ai4animation/dog_run04_joint_pos.txt", 480, 663, 1],
["canter0", "datasets/keypoint_datasets/ai4animation/dog_run00_joint_pos.txt", 430, 480, 1],
["canter1", "datasets/keypoint_datasets/ai4animation/dog_run00_joint_pos.txt", 380, 430, 1],
["canter2", "datasets/keypoint_datasets/ai4animation/dog_run00_joint_pos.txt", 480, 566, 1],
["right_turn0", "datasets/keypoint_datasets/ai4animation/dog_walk09_joint_pos.txt", 1085, 1124, 1.5],
["right_turn1", "datasets/keypoint_datasets/ai4animation/dog_walk09_joint_pos.txt", 560, 670, 1.5],
["left_turn0", "datasets/keypoint_datasets/ai4animation/dog_walk09_joint_pos.txt", 2404, 2450, 1.5],
["left_turn1", "datasets/keypoint_datasets/ai4animation/dog_walk09_joint_pos.txt", 120, 220, 1.5]
]
import numpy as np
from pybullet_utils import transformations
import pybullet
from rsl_rl.datasets import pose3d
POS_SIZE = 3
ROT_SIZE = 4
def get_root_pos(pose):
return pose[0:POS_SIZE]
def get_root_rot(pose):
return pose[POS_SIZE : (POS_SIZE + ROT_SIZE)]
def calc_heading(q):
"""Returns the heading of a rotation q, specified as a quaternion.
The heading represents the rotational component of q along the vertical
axis (z axis).
Args:
q: A quaternion that the heading is to be computed from.
Returns:
An angle representing the rotation about the z axis.
"""
ref_dir = np.array([1, 0, 0])
rot_dir = pose3d.QuaternionRotatePoint(ref_dir, q)
heading = np.arctan2(rot_dir[1], rot_dir[0])
return heading
def calc_heading_rot(q):
"""Return a quaternion representing the heading rotation of q along the vertical axis (z axis).
Args:
q: A quaternion that the heading is to be computed from.
Returns:
A quaternion representing the rotation about the z axis.
"""
heading = calc_heading(q)
q_heading = transformations.quaternion_about_axis(heading, [0, 0, 1])
return q_heading
def get_joint_limits(robot):
num_joints = pybullet.getNumJoints(robot)
joint_limit_low = []
joint_limit_high = []
for i in range(num_joints):
joint_info = pybullet.getJointInfo(robot, i)
joint_type = joint_info[2]
if (
joint_type == pybullet.JOINT_PRISMATIC
or joint_type == pybullet.JOINT_REVOLUTE
):
joint_limit_low.append(joint_info[8])
joint_limit_high.append(joint_info[9])
return joint_limit_low, joint_limit_high
def set_pose(robot, pose):
num_joints = pybullet.getNumJoints(robot)
root_pos = get_root_pos(pose)
root_rot = get_root_rot(pose)
pybullet.resetBasePositionAndOrientation(robot, root_pos, root_rot)
for j in range(num_joints):
j_info = pybullet.getJointInfo(robot, j)
j_state = pybullet.getJointStateMultiDof(robot, j)
j_pose_idx = j_info[3]
j_pose_size = len(j_state[0])
j_vel_size = len(j_state[1])
if j_pose_size > 0:
j_pose = pose[j_pose_idx : (j_pose_idx + j_pose_size)]
j_vel = np.zeros(j_vel_size)
pybullet.resetJointStateMultiDof(robot, j, j_pose, j_vel)
def output_motion(frames, out_filename, motion_weight, frame_duration):
with open(out_filename, "w") as f:
f.write("{\n")
f.write('"LoopMode": "Wrap",\n')
f.write('"FrameDuration": ' + str(frame_duration) + ",\n")
f.write('"EnableCycleOffsetPosition": true,\n')
f.write('"EnableCycleOffsetRotation": true,\n')
f.write('"MotionWeight": ' + str(motion_weight) + ",\n")
f.write("\n")
f.write('"Frames":\n')
f.write("[")
for i in range(frames.shape[0]):
curr_frame = frames[i]
if i != 0:
f.write(",")
f.write("\n [")
for j in range(frames.shape[1]):
curr_val = curr_frame[j]
if j != 0:
f.write(", ")
f.write("%.5f" % curr_val)
f.write("]")
f.write("\n]")
f.write("\n}")
return
###########################
## Visualization methods ##
###########################
def set_linear_vel_pos(linear_vel, robot_idx, unique_id=None):
ray_start = np.array([0.0, 0.0, 0.25])
ray_end = ray_start + linear_vel / 3.0
if unique_id is not None:
return pybullet.addUserDebugLine(
lineFromXYZ=ray_start,
lineToXYZ=ray_end,
lineWidth=4,
replaceItemUniqueId=unique_id,
lineColorRGB=[1, 0, 0],
parentObjectUniqueId=robot_idx,
)
else:
return pybullet.addUserDebugLine(
lineFromXYZ=ray_start,
lineToXYZ=ray_end,
lineWidth=4,
lineColorRGB=[1, 0, 0],
parentObjectUniqueId=robot_idx,
)
def set_angular_vel_pos(angular_vel, robot_idx, unique_id=None):
ray_start = np.array([0.0, 0.0, 0.0])
ray_end = ray_start + angular_vel
if unique_id is not None:
return pybullet.addUserDebugLine(
lineFromXYZ=ray_start,
lineToXYZ=ray_end,
lineWidth=4,
replaceItemUniqueId=unique_id,
lineColorRGB=[1, 0, 0],
parentObjectUniqueId=robot_idx,
)
else:
return pybullet.addUserDebugLine(
lineFromXYZ=ray_start,
lineToXYZ=ray_end,
lineWidth=4,
lineColorRGB=[1, 0, 0],
parentObjectUniqueId=robot_idx,
)
def build_markers(num_markers, special_idx, special_colors):
marker_radius = 0.01
markers = []
for i in range(num_markers):
if i in special_idx:
color = special_colors[special_idx.index(i)]
else:
color = [1, 1, 1, 1]
virtual_shape_id = pybullet.createVisualShape(
shapeType=pybullet.GEOM_SPHERE, radius=marker_radius, rgbaColor=color
)
body_id = pybullet.createMultiBody(
baseMass=0,
baseCollisionShapeIndex=-1,
baseVisualShapeIndex=virtual_shape_id,
basePosition=[0, 0, 0],
useMaximalCoordinates=True,
)
markers.append(body_id)
return markers
def update_marker_pos(marker_pos, marker_ids):
num_markers = len(marker_ids)
assert num_markers == marker_pos.shape[0]
for i in range(num_markers):
curr_id = marker_ids[i]
curr_pos = marker_pos[i]
pybullet.resetBasePositionAndOrientation(curr_id, curr_pos, [0, 0, 0, 1])
return marker_ids
def build_coordinate_frame():
dir_line_1 = pybullet.addUserDebugLine([0, 0, 0], [1, 1, 1], [1, 0, 0])
dir_line_2 = pybullet.addUserDebugLine([0, 0, 0], [1, 1, 1], [1, 0, 0])
dir_line_3 = pybullet.addUserDebugLine([0, 0, 0], [1, 1, 1], [1, 0, 0])
return [dir_line_1, dir_line_2, dir_line_3]
def update_coordinate_frame(lines, origin, H, R, scale=0.25):
for i in range(3):
color = [0, 0, 0]
color[i] = 1
lines[i] = pybullet.addUserDebugLine(
lineFromXYZ=origin,
lineToXYZ=H + R.dot(np.array(color)) * scale,
replaceItemUniqueId=lines[i],
lineColorRGB=color,
)
return lines
import numpy as np
import torch
from typing import Tuple
_EPS = np.finfo(float).eps * 4.0
class RunningMeanStd:
def __init__(self, epsilon: float = 1e-4, shape: Tuple[int, ...] = ()):
"""
Calculates the running mean and std of a data stream
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
:param epsilon: helps with arithmetic issues
:param shape: the shape of the data stream's output
"""
self.mean = np.zeros(shape, np.float64)
self.var = np.ones(shape, np.float64)
self.count = epsilon
def update(self, arr: np.ndarray) -> None:
batch_mean = np.mean(arr, axis=0)
batch_var = np.var(arr, axis=0)
batch_count = arr.shape[0]
self.update_from_moments(batch_mean, batch_var, batch_count)
def update_from_moments(self, batch_mean: np.ndarray, batch_var: np.ndarray, batch_count: int) -> None:
delta = batch_mean - self.mean
tot_count = self.count + batch_count
new_mean = self.mean + delta * batch_count / tot_count
m_a = self.var * self.count
m_b = batch_var * batch_count
m_2 = m_a + m_b + np.square(delta) * self.count * batch_count / (self.count + batch_count)
new_var = m_2 / (self.count + batch_count)
new_count = batch_count + self.count
self.mean = new_mean
self.var = new_var
self.count = new_count
class Normalizer(RunningMeanStd):
def __init__(self, input_dim, epsilon=1e-4, clip_obs=10.0):
super().__init__(shape=input_dim)
self.epsilon = epsilon
self.clip_obs = clip_obs
def normalize(self, input):
return np.clip((input - self.mean) / np.sqrt(self.var + self.epsilon), -self.clip_obs, self.clip_obs)
def normalize_torch(self, input, device):
mean_torch = torch.tensor(self.mean, device=device, dtype=torch.float32)
std_torch = torch.sqrt(torch.tensor(self.var + self.epsilon, device=device, dtype=torch.float32))
return torch.clamp((input - mean_torch) / std_torch, -self.clip_obs, self.clip_obs)
def update_normalizer(self, rollouts, expert_loader):
policy_data_generator = rollouts.feed_forward_generator_amp(None, mini_batch_size=expert_loader.batch_size)
expert_data_generator = expert_loader.dataset.feed_forward_generator_amp(expert_loader.batch_size)
for expert_batch, policy_batch in zip(expert_data_generator, policy_data_generator):
self.update(torch.vstack(tuple(policy_batch) + tuple(expert_batch)).cpu().numpy())
def quaternion_slerp(q0, q1, fraction, spin=0, shortestpath=True):
"""Batch quaternion spherical linear interpolation."""
out = torch.zeros_like(q0)
zero_mask = torch.isclose(fraction, torch.zeros_like(fraction)).squeeze()
ones_mask = torch.isclose(fraction, torch.ones_like(fraction)).squeeze()
out[zero_mask] = q0[zero_mask]
out[ones_mask] = q1[ones_mask]
d = torch.sum(q0 * q1, dim=-1, keepdim=True)
dist_mask = (torch.abs(torch.abs(d) - 1.0) < _EPS).squeeze()
out[dist_mask] = q0[dist_mask]
if shortestpath:
d_old = torch.clone(d)
d = torch.where(d_old < 0, -d, d)
q1 = torch.where(d_old < 0, -q1, q1)
angle = torch.acos(d) + spin * torch.pi
angle_mask = (torch.abs(angle) < _EPS).squeeze()
out[angle_mask] = q0[angle_mask]
final_mask = torch.logical_or(zero_mask, ones_mask)
final_mask = torch.logical_or(final_mask, dist_mask)
final_mask = torch.logical_or(final_mask, angle_mask)
final_mask = torch.logical_not(final_mask)
isin = 1.0 / angle
q0 *= torch.sin((1.0 - fraction) * angle) * isin
q1 *= torch.sin(fraction * angle) * isin
q0 += q1
out[final_mask] = q0[final_mask]
return out
def split_and_pad_trajectories(tensor, dones):
""" Splits trajectories at done indices. Then concatenates them and padds with zeros up to the length og the longest trajectory.
Returns masks corresponding to valid parts of the trajectories
Example:
Input: [ [a1, a2, a3, a4 | a5, a6],
[b1, b2 | b3, b4, b5 | b6]
]
Output:[ [a1, a2, a3, a4], | [ [True, True, True, True],
[a5, a6, 0, 0], | [True, True, False, False],
[b1, b2, 0, 0], | [True, True, False, False],
[b3, b4, b5, 0], | [True, True, True, False],
[b6, 0, 0, 0] | [True, False, False, False],
] | ]
Assumes that the inputy has the following dimension order: [time, number of envs, aditional dimensions]
"""
dones = dones.clone()
dones[-1] = 1
# Permute the buffers to have order (num_envs, num_transitions_per_env, ...), for correct reshaping
flat_dones = dones.transpose(1, 0).reshape(-1, 1)
# Get length of trajectory by counting the number of successive not done elements
done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero()[:, 0]))
trajectory_lengths = done_indices[1:] - done_indices[:-1]
trajectory_lengths_list = trajectory_lengths.tolist()
# Extract the individual trajectories
trajectories = torch.split(tensor.transpose(1, 0).flatten(0, 1),trajectory_lengths_list)
padded_trajectories = torch.nn.utils.rnn.pad_sequence(trajectories)
trajectory_masks = trajectory_lengths > torch.arange(0, tensor.shape[0], device=tensor.device).unsqueeze(1)
return padded_trajectories, trajectory_masks
def unpad_trajectories(trajectories, masks):
""" Does the inverse operation of split_and_pad_trajectories()
"""
# Need to transpose before and after the masking to have proper reshaping
return trajectories.transpose(1, 0)[masks.transpose(1, 0)].view(-1, trajectories.shape[0], trajectories.shape[-1]).transpose(1, 0)
from typing import Tuple
import numpy as np
_EPS = np.finfo(float).eps * 4.0
class RunningMeanStd(object):
def __init__(self, epsilon: float = 1e-4, shape: Tuple[int, ...] = ()):
"""
Calulates the running mean and std of a data stream
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
:param epsilon: helps with arithmetic issues
:param shape: the shape of the data stream's output
"""
self.mean = np.zeros(shape, np.float64)
self.var = np.ones(shape, np.float64)
self.count = epsilon
def update(self, arr: np.ndarray) -> None:
batch_mean = np.mean(arr, axis=0)
batch_var = np.var(arr, axis=0)
batch_count = arr.shape[0]
self.update_from_moments(batch_mean, batch_var, batch_count)
def update_from_moments(self, batch_mean: np.ndarray, batch_var: np.ndarray, batch_count: int) -> None:
delta = batch_mean - self.mean
tot_count = self.count + batch_count
new_mean = self.mean + delta * batch_count / tot_count
m_a = self.var * self.count
m_b = batch_var * batch_count
m_2 = m_a + m_b + np.square(delta) * self.count * batch_count / (self.count + batch_count)
new_var = m_2 / (self.count + batch_count)
new_count = batch_count + self.count
self.mean = new_mean
self.var = new_var
self.count = new_count
class Normalizer(RunningMeanStd):
def __init__(self, input_dim, epsilon=1e-4, clip_obs=10.0):
super().__init__(shape=input_dim)
self.epsilon = epsilon
self.clip_obs = clip_obs
def normalize(self, input):
return np.clip(
(input - self.mean) / np.sqrt(self.var + self.epsilon),
-self.clip_obs, self.clip_obs)
def normalize_torch(self, input, device):
mean_torch = torch.tensor(
self.mean, device=device, dtype=torch.float32)
std_torch = torch.sqrt(torch.tensor(
self.var + self.epsilon, device=device, dtype=torch.float32))
return torch.clamp(
(input - mean_torch) / std_torch, -self.clip_obs, self.clip_obs)
def update_normalizer(self, rollouts, expert_loader):
policy_data_generator = rollouts.feed_forward_generator_amp(
None, mini_batch_size=expert_loader.batch_size)
expert_data_generator = expert_loader.dataset.feed_forward_generator_amp(
expert_loader.batch_size)
for expert_batch, policy_batch in zip(expert_data_generator, policy_data_generator):
self.update(
torch.vstack(tuple(policy_batch) + tuple(expert_batch)).cpu().numpy())
def quaternion_slerp(q0, q1, fraction, spin=0, shortestpath=True):
"""Batch quaternion spherical linear interpolation."""
out = torch.zeros_like(q0)
zero_mask = torch.isclose(fraction, torch.zeros_like(fraction)).squeeze()
ones_mask = torch.isclose(fraction, torch.ones_like(fraction)).squeeze()
out[zero_mask] = q0[zero_mask]
out[ones_mask] = q1[ones_mask]
d = torch.sum(q0 * q1, dim=-1, keepdim=True)
dist_mask = (torch.abs(torch.abs(d) - 1.0) < _EPS).squeeze()
out[dist_mask] = q0[dist_mask]
if shortestpath:
d_old = torch.clone(d)
d = torch.where(d_old < 0, -d, d)
q1 = torch.where(d_old < 0, -q1, q1)
angle = torch.acos(d) + spin * torch.pi
angle_mask = (torch.abs(angle) < _EPS).squeeze()
out[angle_mask] = q0[angle_mask]
final_mask = torch.logical_or(zero_mask, ones_mask)
final_mask = torch.logical_or(final_mask, dist_mask)
final_mask = torch.logical_or(final_mask, angle_mask)
final_mask = torch.logical_not(final_mask)
isin = 1.0 / angle
q0 *= torch.sin((1.0 - fraction) * angle) * isin
q1 *= torch.sin(fraction * angle) * isin
q0 += q1
out[final_mask] = q0[final_mask]
return out
# coding=utf-8
# Copyright 2020 The Google Research Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility functions for processing motion clips."""
import os
import inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0, parentdir)
import numpy as np
from .pose3d import *
from pybullet_utils import transformations
def standardize_quaternion(q):
"""Returns a quaternion where q.w >= 0 to remove redundancy due to q = -q.
Args:
q: A quaternion to be standardized.
Returns:
A quaternion with q.w >= 0.
"""
if q[-1] < 0:
q = -q
return q
def normalize_rotation_angle(theta):
"""Returns a rotation angle normalized between [-pi, pi].
Args:
theta: angle of rotation (radians).
Returns:
An angle of rotation normalized between [-pi, pi].
"""
norm_theta = theta
if np.abs(norm_theta) > np.pi:
norm_theta = np.fmod(norm_theta, 2 * np.pi)
if norm_theta >= 0:
norm_theta += -2 * np.pi
else:
norm_theta += 2 * np.pi
return norm_theta
def calc_heading(q):
"""Returns the heading of a rotation q, specified as a quaternion.
The heading represents the rotational component of q along the vertical
axis (z axis).
Args:
q: A quaternion that the heading is to be computed from.
Returns:
An angle representing the rotation about the z axis.
"""
ref_dir = np.array([1, 0, 0])
rot_dir = pose3d.QuaternionRotatePoint(ref_dir, q)
heading = np.arctan2(rot_dir[1], rot_dir[0])
return heading
def calc_heading_rot(q):
"""Return a quaternion representing the heading rotation of q along the vertical axis (z axis).
Args:
q: A quaternion that the heading is to be computed from.
Returns:
A quaternion representing the rotation about the z axis.
"""
heading = calc_heading(q)
q_heading = transformations.quaternion_about_axis(heading, [0, 0, 1])
return q_heading
# coding=utf-8
# Copyright 2020 The Google Research Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for 3D pose conversion."""
import math
import numpy as np
from pybullet_utils import transformations
VECTOR3_0 = np.zeros(3, dtype=np.float64)
VECTOR3_1 = np.ones(3, dtype=np.float64)
VECTOR3_X = np.array([1, 0, 0], dtype=np.float64)
VECTOR3_Y = np.array([0, 1, 0], dtype=np.float64)
VECTOR3_Z = np.array([0, 0, 1], dtype=np.float64)
# QUATERNION_IDENTITY is the multiplicative identity 1.0 + 0i + 0j + 0k.
# When interpreted as a rotation, it is the identity rotation.
QUATERNION_IDENTITY = np.array([0.0, 0.0, 0.0, 1.0], dtype=np.float64)
def Vector3RandomNormal(sigma, mu=VECTOR3_0):
"""Returns a random 3D vector from a normal distribution.
Each component is selected independently from a normal distribution.
Args:
sigma: Scale (or stddev) of distribution for all variables.
mu: Mean of distribution for each variable.
Returns:
A 3D vector in a numpy array.
"""
random_v3 = np.random.normal(scale=sigma, size=3) + mu
return random_v3
def Vector3RandomUniform(low=VECTOR3_0, high=VECTOR3_1):
"""Returns a 3D vector selected uniformly from the input box.
Args:
low: The min-value corner of the box.
high: The max-value corner of the box.
Returns:
A 3D vector in a numpy array.
"""
random_x = np.random.uniform(low=low[0], high=high[0])
random_y = np.random.uniform(low=low[1], high=high[1])
random_z = np.random.uniform(low=low[2], high=high[2])
return np.array([random_x, random_y, random_z])
def Vector3RandomUnit():
"""Returns a random 3D vector with unit length.
Generates a 3D vector selected uniformly from the unit sphere.
Returns:
A normalized 3D vector in a numpy array.
"""
longitude = np.random.uniform(low=-math.pi, high=math.pi)
sin_latitude = np.random.uniform(low=-1.0, high=1.0)
cos_latitude = math.sqrt(1.0 - sin_latitude * sin_latitude)
x = math.cos(longitude) * cos_latitude
y = math.sin(longitude) * cos_latitude
z = sin_latitude
return np.array([x, y, z], dtype=np.float64)
def QuaternionNormalize(q):
"""Normalizes the quaternion to length 1.
Divides the quaternion by its magnitude. If the magnitude is too
small, returns the quaternion identity value (1.0).
Args:
q: A quaternion to be normalized.
Raises:
ValueError: If input quaternion has length near zero.
Returns:
A quaternion with magnitude 1 in a numpy array [x, y, z, w].
"""
q_norm = np.linalg.norm(q)
if np.isclose(q_norm, 0.0):
raise ValueError(
'Quaternion may not be zero in QuaternionNormalize: |q| = %f, q = %s' %
(q_norm, q))
return q / q_norm
def QuaternionFromAxisAngle(axis, angle):
"""Returns a quaternion that generates the given axis-angle rotation.
Returns the quaternion: sin(angle/2) * axis + cos(angle/2).
Args:
axis: Axis of rotation, a 3D vector in a numpy array.
angle: The angle of rotation (radians).
Raises:
ValueError: If input axis is not a normalizable 3D vector.
Returns:
A unit quaternion in a numpy array.
"""
if len(axis) != 3:
raise ValueError('Axis vector should have three components: %s' % axis)
axis_norm = np.linalg.norm(axis)
if np.isclose(axis_norm, 0.0):
raise ValueError('Axis vector may not have zero length: |v| = %f, v = %s' %
(axis_norm, axis))
half_angle = angle * 0.5
q = np.zeros(4, dtype=np.float64)
q[0:3] = axis
q[0:3] *= math.sin(half_angle) / axis_norm
q[3] = math.cos(half_angle)
return q
def QuaternionToAxisAngle(quat, default_axis=VECTOR3_Z, direction_axis=None):
"""Calculates axis and angle of rotation performed by a quaternion.
Calculates the axis and angle of the rotation performed by the quaternion.
The quaternion should have four values and be normalized.
Args:
quat: Unit quaternion in a numpy array.
default_axis: 3D vector axis used if the rotation is near to zero. Without
this default, small rotations would result in an exception. It is
reasonable to use a default axis for tiny rotations, because zero angle
rotations about any axis are equivalent.
direction_axis: Used to disambiguate rotation directions. If the
direction_axis is specified, the axis of the rotation will be chosen such
that its inner product with the direction_axis is non-negative.
Raises:
ValueError: If quat is not a normalized quaternion.
Returns:
axis: Axis of rotation.
angle: Angle in radians.
"""
if len(quat) != 4:
raise ValueError(
'Quaternion should have four components [x, y, z, w]: %s' % quat)
if not np.isclose(1.0, np.linalg.norm(quat)):
raise ValueError('Quaternion should have unit length: |q| = %f, q = %s' %
(np.linalg.norm(quat), quat))
axis = quat[:3].copy()
axis_norm = np.linalg.norm(axis)
min_axis_norm = 1e-8
if axis_norm < min_axis_norm:
axis = default_axis
if len(default_axis) != 3:
raise ValueError('Axis vector should have three components: %s' % axis)
if not np.isclose(np.linalg.norm(axis), 1.0):
raise ValueError('Axis vector should have unit length: |v| = %f, v = %s' %
(np.linalg.norm(axis), axis))
else:
axis /= axis_norm
sin_half_angle = axis_norm
if direction_axis is not None and np.inner(axis, direction_axis) < 0:
sin_half_angle = -sin_half_angle
axis = -axis
cos_half_angle = quat[3]
half_angle = math.atan2(sin_half_angle, cos_half_angle)
angle = half_angle * 2
return axis, angle
def QuaternionRandomRotation(max_angle=math.pi):
"""Creates a random small rotation around a random axis.
Generates a small rotation with the axis vector selected uniformly
from the unit sphere and an angle selected from a uniform
distribution over [0, max_angle].
If the max_angle is not specified, the rotation should be selected
uniformly over all possible rotation angles.
Args:
max_angle: The maximum angle of rotation (radians).
Returns:
A unit quaternion in a numpy array.
"""
angle = np.random.uniform(low=0, high=max_angle)
axis = Vector3RandomUnit()
return QuaternionFromAxisAngle(axis, angle)
def QuaternionRotatePoint(point, quat):
"""Performs a rotation by quaternion.
Rotate the point by the quaternion using quaternion multiplication,
(q * p * q^-1), without constructing the rotation matrix.
Args:
point: The point to be rotated.
quat: The rotation represented as a quaternion [x, y, z, w].
Returns:
A 3D vector in a numpy array.
"""
q_point = np.array([point[0], point[1], point[2], 0.0])
quat_inverse = transformations.quaternion_inverse(quat)
q_point_rotated = transformations.quaternion_multiply(
transformations.quaternion_multiply(quat, q_point), quat_inverse)
return q_point_rotated[:3]
def IsRotationMatrix(m):
"""Returns true if the 3x3 submatrix represents a rotation.
Args:
m: A transformation matrix.
Raises:
ValueError: If input is not a matrix of size at least 3x3.
Returns:
True if the 3x3 submatrix is a rotation (orthogonal).
"""
if len(m.shape) != 2 or m.shape[0] < 3 or m.shape[1] < 3:
raise ValueError('Matrix should be 3x3 or 4x4: %s\n %s' % (m.shape, m))
rot = m[:3, :3]
eye = np.matmul(rot, np.transpose(rot))
return np.isclose(eye, np.identity(3), atol=1e-4).all()
# def ZAxisAlignedRobotPoseTool(robot_pose_tool):
# """Returns the current gripper pose rotated for alignment with the z-axis.
# Args:
# robot_pose_tool: a pose3d.Pose3d() instance.
# Returns:
# An instance of pose.Transform representing the current gripper pose
# rotated for alignment with the z-axis.
# """
# # Align the current pose to the z-axis.
# robot_pose_tool.quaternion = transformations.quaternion_multiply(
# RotationBetween(
# robot_pose_tool.matrix4x4[0:3, 0:3].dot(np.array([0, 0, 1])),
# np.array([0.0, 0.0, -1.0])), robot_pose_tool.quaternion)
# return robot_pose_tool
# def RotationBetween(a_translation_b, a_translation_c):
# """Computes the rotation from one vector to another.
# The computed rotation has the property that:
# a_translation_c = a_rotation_b_to_c * a_translation_b
# Args:
# a_translation_b: vec3, vector to rotate from
# a_translation_c: vec3, vector to rotate to
# Returns:
# a_rotation_b_to_c: new Orientation
# """
# rotation = rotation3.Rotation3.rotation_between(
# a_translation_b, a_translation_c, err_msg='RotationBetween')
# return rotation.quaternion.xyzw
import torch
import torch.nn as nn
from torch import autograd
class AMPDiscriminator(nn.Module):
def __init__(
self, input_dim, amp_reward_coef, hidden_layer_sizes, device, task_reward_lerp=0.0):
super(AMPDiscriminator, self).__init__()
self.device = device
self.input_dim = input_dim
self.amp_reward_coef = amp_reward_coef
amp_layers = []
curr_in_dim = input_dim
for hidden_dim in hidden_layer_sizes:
amp_layers.append(nn.Linear(curr_in_dim, hidden_dim))
amp_layers.append(nn.LeakyReLU())
curr_in_dim = hidden_dim
self.trunk = nn.Sequential(*amp_layers).to(device)
self.amp_linear = nn.Linear(hidden_layer_sizes[-1], 1).to(device)
self.trunk.train()
self.amp_linear.train()
self.task_reward_lerp = task_reward_lerp
def forward(self, x):
h = self.trunk(x)
d = self.amp_linear(h)
return d
def compute_grad_pen(self,
expert_state,
expert_next_state,
lambda_=10):
expert_data = torch.cat([expert_state, expert_next_state], dim=-1)
expert_data.requires_grad = True
disc = self.amp_linear(self.trunk(expert_data))
ones = torch.ones(disc.size(), device=disc.device)
grad = autograd.grad(
outputs=disc, inputs=expert_data,
grad_outputs=ones, create_graph=True,
retain_graph=True, only_inputs=True)[0]
# Enforce that the grad norm approaches 0.
grad_pen = lambda_ * (grad.norm(2, dim=1) - 0).pow(2).mean()
return grad_pen
def predict_amp_reward(
self, state, next_state, task_reward, normalizer=None):
with torch.no_grad():
self.eval()
if normalizer is not None:
state = normalizer.normalize_torch(state, self.device)
next_state = normalizer.normalize_torch(next_state, self.device)
d = self.amp_linear(self.trunk(torch.cat([state, next_state], dim=-1)))
# reward = self.amp_reward_coef * torch.clamp(1 - (1/4) * torch.square(torch.zeros_like(d)), min=0)
reward = self.amp_reward_coef * torch.clamp(1 - (1/4) * torch.square(d - 1), min=0)
amp_reward = reward.clone()
if self.task_reward_lerp > 0:
reward = self._lerp_reward(reward, task_reward.unsqueeze(-1))
b=(1.0 - self.task_reward_lerp) * amp_reward.squeeze()
c= self.task_reward_lerp * task_reward.squeeze()
self.train()
return reward.squeeze(), b, c
def _lerp_reward(self, disc_r, task_r):
r = (1.0 - self.task_reward_lerp) * disc_r + self.task_reward_lerp * task_r
return r
\ No newline at end of file
......@@ -41,7 +41,9 @@ from .dh_ppo import DHPPO
from .actor_critic_dh import ActorCriticDH
from humanoid.algo.vec_env import VecEnv
from torch.utils.tensorboard import SummaryWriter
from ..loadamp.motion_loader import AMPLoader
from ..loadamp.amp_utils import Normalizer
from .amp_discriminator import AMPDiscriminator
class DHOnPolicyRunner:
def __init__(self, env: VecEnv, train_cfg, log_dir=None, device="cpu"):
......@@ -70,8 +72,24 @@ class DHOnPolicyRunner:
self.env.num_short_obs, self.env.num_single_obs, num_critic_obs, self.env.num_actions, **self.policy_cfg
).to(self.device)
amp_data = AMPLoader(
device,
motion_files=self.cfg["amp_motion_files"],
time_between_frames=self.env.dt,
preload_transitions=True,
num_preload_transitions=train_cfg['runner']['amp_num_preload_transitions'])
amp_normalizer = Normalizer(amp_data.observation_dim)
discriminator = AMPDiscriminator(
amp_data.observation_dim * 2,
train_cfg['runner']['amp_reward_coef'],
train_cfg['runner']['amp_discr_hidden_dims'], device,
train_cfg['runner']['amp_task_reward_lerp']).to(self.device)
min_std = (
torch.tensor(self.cfg["min_normalized_std"], device=self.device) *
(torch.abs(self.env.dof_pos_limits[:, 1] - self.env.dof_pos_limits[:, 0])))
alg_class = eval(self.cfg["algorithm_class_name"]) # PPO
self.alg: DHPPO = alg_class(actor_critic, device=self.device, **self.alg_cfg)
self.alg: DHPPO = alg_class(actor_critic,discriminator, amp_data, amp_normalizer, min_std, device=self.device, **self.alg_cfg)
self.num_steps_per_env = self.cfg["num_steps_per_env"]
self.save_interval = self.cfg["save_interval"]
......@@ -106,17 +124,26 @@ class DHOnPolicyRunner:
privileged_obs = self.env.get_privileged_observations()
critic_obs = privileged_obs if privileged_obs is not None else obs
obs, critic_obs = obs.to(self.device), critic_obs.to(self.device)
self.alg.actor_critic.train() # switch to train mode (for dropout for example)
amp_obs = self.env.get_amp_observations()
amp_obs = amp_obs.to(self.device)
self.alg.actor_critic.train() # switch to train mode (for dropout for example)
self.alg.discriminator.train()
ep_infos = []
rewbuffer = deque(maxlen=100)
lenbuffer = deque(maxlen=100)
amp_rewbuffer = deque(maxlen=100)
task_rewbuffer = deque(maxlen=100)
cur_reward_sum = torch.zeros(
self.env.num_envs, dtype=torch.float, device=self.device
)
cur_episode_length = torch.zeros(
self.env.num_envs, dtype=torch.float, device=self.device
)
cur_amp_reward_sum = torch.zeros(self.env.num_envs, dtype=torch.float, device=self.device)
cur_task_reward_sum = torch.zeros(self.env.num_envs, dtype=torch.float, device=self.device)
tot_iter = self.current_learning_iteration + num_learning_iterations
......@@ -128,30 +155,49 @@ class DHOnPolicyRunner:
# Rollout
with torch.inference_mode():
for i in range(self.num_steps_per_env):
actions = self.alg.act(obs, critic_obs)
obs, privileged_obs, rewards, dones, infos = self.env.step(actions)
actions = self.alg.act(obs, critic_obs, amp_obs)
obs, privileged_obs, rewards, dones, infos, reset_env_ids, terminal_amp_states = self.env.step(actions)
critic_obs = privileged_obs if privileged_obs is not None else obs
next_amp_obs = self.env.get_amp_observations()
obs, critic_obs, rewards, dones = (
obs.to(self.device),
critic_obs.to(self.device),
rewards.to(self.device),
dones.to(self.device),
)
self.alg.process_env_step(rewards, dones, infos)
next_amp_obs = next_amp_obs.to(self.device)
# Account for terminal states.
next_amp_obs_with_term = torch.clone(next_amp_obs)
next_amp_obs_with_term[reset_env_ids] = terminal_amp_states
rewards,amp_reward,task_reward = self.alg.discriminator.predict_amp_reward(amp_obs, next_amp_obs_with_term, rewards, normalizer=self.alg.amp_normalizer)
amp_obs = torch.clone(next_amp_obs)
self.alg.process_env_step(rewards, dones, infos, next_amp_obs_with_term)
if self.log_dir is not None:
# Book keeping
if "episode" in infos:
ep_infos.append(infos["episode"])
cur_reward_sum += rewards
cur_amp_reward_sum += amp_reward
cur_task_reward_sum += task_reward
cur_episode_length += 1
new_ids = (dones > 0).nonzero(as_tuple=False)
rewbuffer.extend(
cur_reward_sum[new_ids][:, 0].cpu().numpy().tolist()
)
amp_rewbuffer.extend(
cur_amp_reward_sum[new_ids][:, 0].cpu().numpy().tolist()
)
task_rewbuffer.extend(
cur_task_reward_sum[new_ids][:, 0].cpu().numpy().tolist()
)
lenbuffer.extend(
cur_episode_length[new_ids][:, 0].cpu().numpy().tolist()
)
cur_amp_reward_sum[new_ids] = 0
cur_task_reward_sum[new_ids] = 0
cur_reward_sum[new_ids] = 0
cur_episode_length[new_ids] = 0
......@@ -162,7 +208,7 @@ class DHOnPolicyRunner:
start = stop
self.alg.compute_returns(critic_obs)
mean_value_loss, mean_surrogate_loss, mean_state_estimator_loss = self.alg.update()
mean_value_loss, mean_surrogate_loss, mean_state_estimator_loss ,mean_amp_loss, mean_grad_pen_loss, mean_policy_pred, mean_expert_pred= self.alg.update()
stop = time.time()
learn_time = stop - start
if self.log_dir is not None:
......@@ -213,6 +259,8 @@ class DHOnPolicyRunner:
self.writer.add_scalar(
"Loss/state_estimator", locs["mean_state_estimator_loss"], locs["it"]
)
self.writer.add_scalar('Loss/AMP', locs['mean_amp_loss'], locs['it'])
self.writer.add_scalar('Loss/AMP_grad', locs['mean_grad_pen_loss'], locs['it'])
self.writer.add_scalar("Loss/learning_rate", self.alg.learning_rate, locs["it"])
self.writer.add_scalar("Policy/mean_noise_std", mean_std.item(), locs["it"])
self.writer.add_scalar("Perf/total_fps", fps, locs["it"])
......@@ -224,6 +272,8 @@ class DHOnPolicyRunner:
self.writer.add_scalar(
"Train/mean_reward", statistics.mean(locs["rewbuffer"]), locs["it"]
)
self.writer.add_scalar('Train/amp_reward', statistics.mean(locs['amp_rewbuffer']), locs['it'])
self.writer.add_scalar('Train/task_reward', statistics.mean(locs['task_rewbuffer']), locs['it'])
self.writer.add_scalar(
"Train/mean_episode_length",
statistics.mean(locs["lenbuffer"]),
......@@ -256,8 +306,15 @@ class DHOnPolicyRunner:
f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n"""
f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n"""
f"""{'State estimator loss:':>{pad}} {locs['mean_state_estimator_loss']:.4f}\n"""
f"""{'AMP loss:':>{pad}} {locs['mean_amp_loss']:.4f}\n"""
f"""{'AMP grad pen loss:':>{pad}} {locs['mean_grad_pen_loss']:.4f}\n"""
f"""{'AMP mean policy pred:':>{pad}} {locs['mean_policy_pred']:.4f}\n"""
f"""{'AMP mean expert pred:':>{pad}} {locs['mean_expert_pred']:.4f}\n"""
f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n"""
f"""{'Mean reward:':>{pad}} {statistics.mean(locs['rewbuffer']):.2f}\n"""
f"""{'Mean reward:':>{pad}} {statistics.mean(locs['rewbuffer']):.2f}\n"""
f"""{'Mean amp reward:':>{pad}} {statistics.mean(locs['amp_rewbuffer']):.2f}\n"""
f"""{'Mean task reward:':>{pad}} {statistics.mean(locs['task_rewbuffer']):.2f}\n"""
f"""{'Mean episode length:':>{pad}} {statistics.mean(locs['lenbuffer']):.2f}\n"""
)
# f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n"""
......@@ -294,6 +351,8 @@ class DHOnPolicyRunner:
"es_optimizer_state_dict": self.alg.state_estimator_optimizer.state_dict(),
"iter": self.it,
"infos": infos,
'discriminator_state_dict': self.alg.discriminator.state_dict(),
'amp_normalizer': self.alg.amp_normalizer,
},
path,
)
......@@ -305,6 +364,8 @@ class DHOnPolicyRunner:
self.alg.optimizer.load_state_dict(loaded_dict["optimizer_state_dict"])
self.alg.state_estimator_optimizer.load_state_dict(loaded_dict["es_optimizer_state_dict"])
self.current_learning_iteration = loaded_dict["iter"]
self.alg.discriminator.load_state_dict(loaded_dict['discriminator_state_dict'])
self.alg.amp_normalizer = loaded_dict['amp_normalizer']
return loaded_dict["infos"]
def get_inference_policy(self, device=None):
......
......@@ -36,11 +36,17 @@ import torch.optim as optim
from .actor_critic_dh import ActorCriticDH
from .rollout_storage import RolloutStorage
from .replay_buffer import ReplayBuffer
class DHPPO:
actor_critic: ActorCriticDH
def __init__(self,
actor_critic,
discriminator,
amp_data,
amp_normalizer,
min_std=None,
amp_replay_buffer_size=100000,
num_learning_epochs=1,
num_mini_batches=1,
clip_param=0.2,
......@@ -62,13 +68,28 @@ class DHPPO:
self.desired_kl = desired_kl
self.schedule = schedule
self.learning_rate = learning_rate
self.min_std = min_std
# Discriminator components
self.discriminator = discriminator
self.discriminator.to(self.device)
self.amp_transition = RolloutStorage.Transition()
self.amp_storage = ReplayBuffer(
discriminator.input_dim // 2, amp_replay_buffer_size, device)
self.amp_data = amp_data
self.amp_normalizer = amp_normalizer
# PPO components
self.actor_critic: ActorCriticDH = actor_critic
self.actor_critic.to(self.device)
self.storage = None # initialized later
self.optimizer = optim.Adam(self.actor_critic.parameters(),
lr=learning_rate)
params = [
{'params': self.actor_critic.parameters(), 'name': 'actor_critic'},
{'params': self.discriminator.trunk.parameters(),
'weight_decay': 10e-4, 'name': 'amp_trunk'},
{'params': self.discriminator.amp_linear.parameters(),
'weight_decay': 10e-2, 'name': 'amp_head'}]
self.optimizer = optim.Adam(params, lr=learning_rate)
self.state_estimator_optimizer = optim.Adam(self.actor_critic.state_estimator.parameters(),
lr=learning_rate)
self.transition = RolloutStorage.Transition()
......@@ -95,7 +116,7 @@ class DHPPO:
def train_mode(self):
self.actor_critic.train()
def act(self, obs, critic_obs):
def act(self, obs, critic_obs, amp_obs):
# Compute the actions and values
self.transition.actions = self.actor_critic.act(obs).detach()
self.transition.values = self.actor_critic.evaluate(critic_obs).detach()
......@@ -105,9 +126,11 @@ class DHPPO:
# need to record obs and critic_obs before env.step()
self.transition.observations = obs
self.transition.critic_observations = critic_obs
self.amp_transition.observations = amp_obs
return self.transition.actions
def process_env_step(self, rewards, dones, infos):
def process_env_step(self, rewards, dones, infos, amp_obs):
self.transition.rewards = rewards.clone()
self.transition.dones = dones
# Bootstrapping on time outs
......@@ -115,8 +138,10 @@ class DHPPO:
self.transition.rewards += self.gamma * torch.squeeze(self.transition.values * infos['time_outs'].unsqueeze(1).to(self.device), 1)
# Record the transition
self.amp_storage.insert(self.amp_transition.observations, amp_obs)
self.storage.add_transitions(self.transition)
self.transition.clear()
self.amp_transition.clear()
self.actor_critic.reset(dones)
def compute_returns(self, last_critic_obs):
......@@ -128,9 +153,22 @@ class DHPPO:
mean_surrogate_loss = 0
mean_state_estimator_loss = 0
mean_amp_loss = 0
mean_grad_pen_loss = 0
mean_policy_pred = 0
mean_expert_pred = 0
generator = self.storage.mini_batch_generator(self.num_mini_batches, self.num_learning_epochs)
for obs_batch, critic_obs_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch, \
old_mu_batch, old_sigma_batch, hid_states_batch, masks_batch in generator:
amp_policy_generator = self.amp_storage.feed_forward_generator(
self.num_learning_epochs * self.num_mini_batches,
self.storage.num_envs * self.storage.num_transitions_per_env //
self.num_mini_batches)
amp_expert_generator = self.amp_data.feed_forward_generator(
self.num_learning_epochs * self.num_mini_batches,
self.storage.num_envs * self.storage.num_transitions_per_env //
self.num_mini_batches)
for (obs_batch, critic_obs_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch, \
old_mu_batch, old_sigma_batch, hid_states_batch, masks_batch), sample_amp_policy, sample_amp_expert in zip(generator, amp_policy_generator, amp_expert_generator):
self.actor_critic.act(obs_batch, masks=masks_batch, hidden_states=hid_states_batch[0])
state_estimator_input = obs_batch[:,-self.num_short_obs:]
......@@ -174,10 +212,34 @@ class DHPPO:
else:
value_loss = (returns_batch - value_batch).pow(2).mean()
# Discriminator loss.
policy_state, policy_next_state = sample_amp_policy
expert_state, expert_next_state = sample_amp_expert
policy_state_unnorm = torch.clone(policy_state)
expert_state_unnorm = torch.clone(expert_state)
if self.amp_normalizer is not None:
with torch.no_grad():
policy_state = self.amp_normalizer.normalize_torch(policy_state, self.device)
policy_next_state = self.amp_normalizer.normalize_torch(policy_next_state, self.device)
expert_state = self.amp_normalizer.normalize_torch(expert_state, self.device)
expert_next_state = self.amp_normalizer.normalize_torch(expert_next_state, self.device)
policy_d = self.discriminator(torch.cat([policy_state, policy_next_state], dim=-1))
expert_d = self.discriminator(torch.cat([expert_state, expert_next_state], dim=-1))
expert_loss = torch.nn.MSELoss()(
expert_d, torch.ones(expert_d.size(), device=self.device))
policy_loss = torch.nn.MSELoss()(
policy_d, -1 * torch.ones(policy_d.size(), device=self.device))
amp_loss = 0.5 * (expert_loss + policy_loss)
grad_pen_loss = self.discriminator.compute_grad_pen(
expert_state, expert_next_state, lambda_=10)
# update all actor_critic.parameters()
loss = (surrogate_loss +
self.value_loss_coef * value_loss -
self.entropy_coef * entropy_batch.mean() +
self.entropy_coef * entropy_batch.mean() + amp_loss + grad_pen_loss +
torch.nn.MSELoss()(est_lin_vel, ref_lin_vel))
# Gradient step
......@@ -186,16 +248,31 @@ class DHPPO:
nn.utils.clip_grad_norm_(self.actor_critic.parameters(), self.max_grad_norm)
self.optimizer.step()
self.actor_critic.std.data = self.actor_critic.std.data.clamp(min=self.min_std) #yzh comment: may be not necessary
if self.amp_normalizer is not None:
self.amp_normalizer.update(policy_state_unnorm.cpu().numpy())
self.amp_normalizer.update(expert_state_unnorm.cpu().numpy())
state_estimator_loss = torch.nn.MSELoss()(est_lin_vel, ref_lin_vel)
mean_value_loss += value_loss.item()
mean_surrogate_loss += surrogate_loss.item()
mean_state_estimator_loss += state_estimator_loss.item()
mean_amp_loss += amp_loss.item()
mean_grad_pen_loss += grad_pen_loss.item()
mean_policy_pred += policy_d.mean().item()
mean_expert_pred += expert_d.mean().item()
num_updates = self.num_learning_epochs * self.num_mini_batches
mean_value_loss /= num_updates
mean_surrogate_loss /= num_updates
mean_state_estimator_loss /= num_updates
mean_amp_loss /= num_updates
mean_grad_pen_loss /= num_updates
mean_policy_pred /= num_updates
mean_expert_pred /= num_updates
self.storage.clear()
return mean_value_loss, mean_surrogate_loss, mean_state_estimator_loss
return mean_value_loss, mean_surrogate_loss, mean_state_estimator_loss, mean_amp_loss, mean_grad_pen_loss, mean_policy_pred, mean_expert_pred
import torch
import numpy as np
class ReplayBuffer:
"""Fixed-size buffer to store experience tuples."""
def __init__(self, obs_dim, buffer_size, device):
"""Initialize a ReplayBuffer object.
Arguments:
buffer_size (int): maximum size of buffer
"""
self.states = torch.zeros(buffer_size, obs_dim).to(device)
self.next_states = torch.zeros(buffer_size, obs_dim).to(device)
self.buffer_size = buffer_size
self.device = device
self.step = 0
self.num_samples = 0
def insert(self, states, next_states):
"""Add new states to memory."""
num_states = states.shape[0]
start_idx = self.step
end_idx = self.step + num_states
if end_idx > self.buffer_size:
self.states[self.step:self.buffer_size] = states[:self.buffer_size - self.step]
self.next_states[self.step:self.buffer_size] = next_states[:self.buffer_size - self.step]
self.states[:end_idx - self.buffer_size] = states[self.buffer_size - self.step:]
self.next_states[:end_idx - self.buffer_size] = next_states[self.buffer_size - self.step:]
else:
self.states[start_idx:end_idx] = states
self.next_states[start_idx:end_idx] = next_states
self.num_samples = min(self.buffer_size, max(end_idx, self.num_samples))
self.step = (self.step + num_states) % self.buffer_size
def feed_forward_generator(self, num_mini_batch, mini_batch_size):
for _ in range(num_mini_batch):
sample_idxs = np.random.choice(self.num_samples, size=mini_batch_size)
yield (self.states[sample_idxs].to(self.device),
self.next_states[sample_idxs].to(self.device))
......@@ -46,7 +46,9 @@ from humanoid.utils.math import quat_apply_yaw, wrap_to_pi, torch_rand_sqrt_floa
from humanoid.utils.helpers import class_to_dict
from .legged_robot_config import LeggedRobotCfg
from humanoid.utils.terrain import Terrain
from humanoid.algo.loadamp.motion_loader import AMPLoader
import humanoid.utils.kinematics.urdf as pk
import glob
def copysign_new(a, b):
a = torch.tensor(a, device=b.device, dtype=torch.float)
......@@ -102,11 +104,22 @@ class LeggedRobot(BaseTask):
self.init_done = False
self._parse_cfg(self.cfg)
super().__init__(self.cfg, sim_params, physics_engine, sim_device, headless)
self.chain_ee = []
TEMP_UDRF = '/home/bxi/Documents/agibot_x1_train_bak/datasets/bot_elf_tempurdf/urdf/bot_elf.urdf'
for ee_name in self.cfg.env.ee_names:
with open(TEMP_UDRF, "rb") as urdf_file:
# with open(self.cfg.asset.file, "rb") as urdf_file:
urdf_content = urdf_file.read()
chain_ee_instance = pk.build_serial_chain_from_urdf(urdf_content, ee_name).to(device=self.device)
self.chain_ee.append(chain_ee_instance)
if not self.headless:
self.set_camera(self.cfg.viewer.pos, self.cfg.viewer.lookat)
self._init_buffers()
self._prepare_reward_function()
self.init_done = True
# print("motion_files dir: ")
# print(self.cfg.env.amp_motion_files)
self.amp_loader = AMPLoader(motion_files=self.cfg.env.amp_motion_files, device=self.device, time_between_frames=self.dt)
def step(self, actions):
""" Apply actions, simulate, call self.post_physics_step()
......@@ -145,19 +158,19 @@ class LeggedRobot(BaseTask):
self.imu_lag_buffer[:,:,1:] = self.imu_lag_buffer[:,:,:self.cfg.domain_rand.imu_lag_timesteps_range[1]].clone()
self.imu_lag_buffer[:,:,0] = torch.cat((self.base_ang_vel, self.base_euler_xyz ), 1).clone()
self.post_physics_step()
reset_env_ids, terminal_amp_states = self.post_physics_step()
# return clipped obs, clipped states (None), rewards, dones and infos
clip_obs = self.cfg.normalization.clip_observations
self.obs_buf = torch.clip(self.obs_buf, -clip_obs, clip_obs)
if self.privileged_obs_buf is not None:
self.privileged_obs_buf = torch.clip(self.privileged_obs_buf, -clip_obs, clip_obs)
return self.obs_buf, self.privileged_obs_buf, self.rew_buf, self.reset_buf, self.extras
return self.obs_buf, self.privileged_obs_buf, self.rew_buf, self.reset_buf, self.extras, reset_env_ids, terminal_amp_states
def reset(self):
""" Reset all robots"""
self.reset_idx(torch.arange(self.num_envs, device=self.device))
obs, privileged_obs, _, _, _ = self.step(torch.zeros(
obs, privileged_obs, _, _, _,_ ,_= self.step(torch.zeros(
self.num_envs, self.num_actions, device=self.device, requires_grad=False))
return obs, privileged_obs
......@@ -188,6 +201,7 @@ class LeggedRobot(BaseTask):
self.check_termination()
self.compute_reward()
env_ids = self.reset_buf.nonzero(as_tuple=False).flatten()
terminal_amp_states = self.get_amp_observations()[env_ids]
self.reset_idx(env_ids)
self.compute_observations() # in some cases a simulation step might be required to refresh some obs (for example body positions)
......@@ -203,6 +217,8 @@ class LeggedRobot(BaseTask):
if self.viewer and self.enable_viewer_sync and self.debug_viz:
self._draw_debug_vis()
return env_ids, terminal_amp_states
def check_termination(self):
""" Check if environments need to be reset
"""
......@@ -238,11 +254,14 @@ class LeggedRobot(BaseTask):
self.update_command_curriculum(env_ids)
# reset rand dof_pos and dof_vel=0
self._reset_dofs(env_ids)
# reset base position
if self.cfg.env.reference_state_initialization:
frames = self.amp_loader.get_full_frame_batch(len(env_ids))
self._reset_dofs_amp(env_ids, frames)
self._reset_root_states_amp(env_ids, frames)
else:
self._reset_dofs(env_ids)
self._reset_root_states(env_ids)
# sample cmd based on self.command_ranges
self._resample_commands(env_ids)
......@@ -757,6 +776,25 @@ class LeggedRobot(BaseTask):
self.gym.set_dof_state_tensor_indexed(self.sim,
gymtorch.unwrap_tensor(self.dof_state),
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
def _reset_dofs_amp(self, env_ids, frames):
""" Resets DOF position and velocities of selected environmments
Positions are randomly selected within 0.5:1.5 x default positions.
Velocities are set to zero.
Args:
env_ids (List[int]): Environemnt ids
frames: AMP frames to initialize motion with
"""
self.dof_pos[env_ids] = AMPLoader.get_joint_pose_batch(frames)
self.dof_vel[env_ids] = AMPLoader.get_joint_vel_batch(frames)
self.dof_pos[env_ids] = self.amp_loader.reorder_from_mujuco_to_isaacgym_tool(self.dof_pos[env_ids])
self.dof_vel[env_ids] = self.amp_loader.reorder_from_mujuco_to_isaacgym_tool(self.dof_vel[env_ids])
self.dof_pos[env_ids] = self.dof_pos[env_ids].clip(self.dof_pos_limits[:,0],self.dof_pos_limits[:,1])
self.dof_vel[env_ids] = self.dof_vel[env_ids].clip(torch.zeros_like(self.dof_vel[env_ids]),self.dof_vel_limits)
env_ids_int32 = env_ids.to(dtype=torch.int32)
self.gym.set_dof_state_tensor_indexed(self.sim,
gymtorch.unwrap_tensor(self.dof_state),
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
def _reset_root_states(self, env_ids):
""" Resets ROOT states position and velocities of selected environmments
......@@ -787,7 +825,27 @@ class LeggedRobot(BaseTask):
self.gym.set_actor_root_state_tensor_indexed(self.sim,
gymtorch.unwrap_tensor(self.root_states),
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
def _reset_root_states_amp(self, env_ids, frames):
""" Resets ROOT states position and velocities of selected environmments
Sets base position based on the curriculum
Selects randomized base velocities within -0.5:0.5 [m/s, rad/s]
Args:
env_ids (List[int]): Environemnt ids
"""
# base position
root_pos = AMPLoader.get_root_pos_batch(frames)
root_pos[:, :2] = root_pos[:, :2] + self.env_origins[env_ids, :2]
self.root_states[env_ids, :3] = root_pos
# base velocities
root_orn = AMPLoader.get_root_rot_batch(frames)
self.root_states[env_ids, 3:7] = root_orn
self.root_states[env_ids, 7:10] = quat_rotate(root_orn, AMPLoader.get_linear_vel_batch(frames))
self.root_states[env_ids, 10:13] = quat_rotate(root_orn, AMPLoader.get_angular_vel_batch(frames))
env_ids_int32 = env_ids.to(dtype=torch.int32)
self.gym.set_actor_root_state_tensor_indexed(self.sim,
gymtorch.unwrap_tensor(self.root_states),
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
def _push_robots(self):
""" Random pushes the robots. Emulates an impulse by setting a randomized base velocity.
"""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册登录 后发表评论