first_amp

c3b1f4f9 · yzh · 90fb0c43 · c3b1f4f9 · c3b1f4f9 · c3b1f4f9
Commit c3b1f4f9 authored Apr 11, 2025 by yzh
--- a/.gitignore
+++ b/.gitignore
@@ -57,7 +57,7 @@ runs
 # other
 *.egg-info
 __pycache__
-
+*.pt
 *.swp

 MUJOCO_LOG.TXT
\ No newline at end of file
--- a/datasets/bot_elf/retar_hor_jog_30.json
+++ b/datasets/bot_elf/retar_hor_jog_30.json
--- a/datasets/bot_elf/retar_hor_jog_acc_30.json
+++ b/datasets/bot_elf/retar_hor_jog_acc_30.json
--- a/datasets/bot_elf/retar_hor_jog_left_30.json
+++ b/datasets/bot_elf/retar_hor_jog_left_30.json
--- a/datasets/bot_elf/retar_hor_jog_right_30.json
+++ b/datasets/bot_elf/retar_hor_jog_right_30.json
--- a/datasets/bot_elf/retar_hor_jog_stop_30.json
+++ b/datasets/bot_elf/retar_hor_jog_stop_30.json
--- a/datasets/bot_elf_tempurdf/meshes/base_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/base_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/l_ankle_x_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/l_ankle_x_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/l_ankle_y_linik.STL
+++ b/datasets/bot_elf_tempurdf/meshes/l_ankle_y_linik.STL
--- a/datasets/bot_elf_tempurdf/meshes/l_ankle_y_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/l_ankle_y_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/l_elb_y_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/l_elb_y_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/l_hip_x_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/l_hip_x_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/l_hip_y_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/l_hip_y_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/l_hip_z_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/l_hip_z_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/l_knee_y_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/l_knee_y_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/l_shld_x_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/l_shld_x_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/l_shld_y_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/l_shld_y_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/l_shld_z_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/l_shld_z_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/r_ankle_x_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/r_ankle_x_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/r_ankle_y_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/r_ankle_y_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/r_elb_y_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/r_elb_y_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/r_hip_x_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/r_hip_x_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/r_hip_y_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/r_hip_y_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/r_hip_z_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/r_hip_z_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/r_knee_y_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/r_knee_y_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/r_shld_x_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/r_shld_x_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/r_shld_y_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/r_shld_y_link.STL
--- a/datasets/bot_elf_tempurdf/meshes/r_shld_z_link.STL
+++ b/datasets/bot_elf_tempurdf/meshes/r_shld_z_link.STL
--- a/datasets/bot_elf_tempurdf/urdf/bot_elf.urdf
+++ b/datasets/bot_elf_tempurdf/urdf/bot_elf.urdf
--- a/datasets/bot_elf_tempurdf/urdf/bot_elf_12dof.urdf
+++ b/datasets/bot_elf_tempurdf/urdf/bot_elf_12dof.urdf
--- a/datasets/bot_elf_tempurdf/urdf/bot_elf_12dof/bot_elf_12dof.usd
+++ b/datasets/bot_elf_tempurdf/urdf/bot_elf_12dof/bot_elf_12dof.usd
--- a/datasets/bot_elf_tempurdf/urdf/bot_elf_12dof/configuration/bot_elf_12dof_base.usd
+++ b/datasets/bot_elf_tempurdf/urdf/bot_elf_12dof/configuration/bot_elf_12dof_base.usd
--- a/datasets/bot_elf_tempurdf/urdf/bot_elf_12dof/configuration/bot_elf_12dof_physics.usd
+++ b/datasets/bot_elf_tempurdf/urdf/bot_elf_12dof/configuration/bot_elf_12dof_physics.usd
--- a/datasets/bot_elf_tempurdf/urdf/bot_elf_12dof/configuration/bot_elf_12dof_sensor.usd
+++ b/datasets/bot_elf_tempurdf/urdf/bot_elf_12dof/configuration/bot_elf_12dof_sensor.usd
--- a/datasets/bot_elf_tempurdf/urdf/bot_elf_20dof.urdf
+++ b/datasets/bot_elf_tempurdf/urdf/bot_elf_20dof.urdf
--- a/datasets/hopturn/hopturn.txt
+++ b/datasets/hopturn/hopturn.txt
--- a/datasets/keypoint_datasets/ai4animation/LICENSE.txt
+++ b/datasets/keypoint_datasets/ai4animation/LICENSE.txt
+The motion capture data is available only under the terms of the Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license.
+https://creativecommons.org/licenses/by-nc/4.0/legalcode
+https://github.com/sebastianstarke/AI4Animation
+
--- a/datasets/keypoint_datasets/ai4animation/dog_clips_info.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_clips_info.txt
+original_name	 	new_name		trim_start		trim_end
+D1_001_KAN01_001 	dog_sit_stand00		
+D1_003_KAN01_001	dog_sit00
+D1_004_KAN01_001	dog_sit_stand01
+D1_005_KAN01_001	dog_sit_stand02
+D1_006_KAN01_001	dog_sit_stand03
+D1_006_KAN01_002	dog_sit_stand04
+D1_007_KAN01_001**	dog_walk00		1.53 (90)
+D1_008_KAN01_001**	dog_walk01		4.87 (290)
+D1_008_KAN01_002**	dog_walk02		6.60 (394)
+D1_009_KAN01_001**	dog_walk03
+D1_009_KAN01_002**	dog_walk04		3.90 (232)
+D1_010_KAN01_001**	dog_run00		6.65 (397)
+D1_010_KAN01_002**	dog_run01
+D1_010_KAN01_003**	dog_run02
+D1_010_KAN01_004*	dog_run03
+D1_013_KAN01_001	dog_sit01
+D1_025_KAN01_001**	dog_walk05		3.37 (200)		9.10 (545)
+D1_031_KAN01_001**	dog_run04		8.15 (487)		11.95 (716)
+D1_045_KAN01_001**	dog_walk06		2.87 (170)		6.57 (392)
+D1_047_KAN01_001**	dog_turn00		5.47 (326)		12.47 (747)
+D1_047z_KAN01_002*	dog_walk_run00
+D1_047z_KAN01_003	dog_hop00
+D1_047z_KAN01_004	dog_hop01
+D1_047z_KAN01_005	dog_walk07
+D1_049_KAN01_001	dog_walk08
+D1_053_KAN01_001**	dog_walk09
+D1_053_KAN01_002*	dog_walk10
+D1_055_KAN01_001	dog_walk_run01
+D1_057_KAN01_001*	dog_jump00		15.68 (940)		19.12 (1146)
+D1_058_KAN01_001*	dog_jump01		25.68 (1539)		29.20 (1751)
+D1_059_KAN01_001	dog_beg00
+D1_061_KAN01_001	dog_walk_run02
+D1_061z_KAN01_002	dog_walk11
+D1_061z_KAN01_003	dog_walk12
+D1_071_KAN02_002	dog_sit02
+D1_073_KAN02_002*	dog_walk13
+D1_086_KAN02_001	dog_beg01
+D1_ex01_KAN01_001*	dog_walk14
+D1_ex02_KAN02_001	dog_sit_walk00
+D1_ex03_KAN02_001	dog_sit_walk01
+D1_ex03_KAN02_003	dog_walk15
+D1_ex03_KAN02_006	dog_jump_down0		33.90 (2033)		36.10 (2165)
+D1_ex03_KAN02_012	dog_walk16
+D1_ex03_KAN02_013	dog_shake0
+D1_ex03_KAN02_014	dog_walk17
+D1_ex04_KAN02_001*	dog_stepup_jump0	5.65 (337)		12.58 (754)
+D1_ex04_KAN02_002*	dog_stepup_jump1	4.20 (250)		8.38 (502)
+D1_ex04_KAN02_003*	dog_stepup_jump2
+D1_ex04_KAN02_004*	dog_stepup_jump3	7.98 (477)
+D1_ex05_KAN02_001	dog_sit_walk02
+D1_ex06_KAN02_001	dog_walk18
\ No newline at end of file
--- a/datasets/keypoint_datasets/ai4animation/dog_pace_joint_pos.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_pace_joint_pos.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_pace_pose.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_pace_pose.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_run00_joint_pos.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_run00_joint_pos.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_run00_pose.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_run00_pose.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_run01_joint_pos.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_run01_joint_pos.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_run01_pose.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_run01_pose.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_run02_joint_pos.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_run02_joint_pos.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_run02_pose.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_run02_pose.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_run04_joint_pos.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_run04_joint_pos.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_run04_pose.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_run04_pose.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_trot_joint_pos.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_trot_joint_pos.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_trot_pose.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_trot_pose.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_turn00_joint_pos.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_turn00_joint_pos.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_turn00_pose.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_turn00_pose.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_walk00_joint_pos.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_walk00_joint_pos.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_walk00_pose.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_walk00_pose.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_walk01_joint_pos.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_walk01_joint_pos.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_walk01_pose.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_walk01_pose.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_walk02_joint_pos.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_walk02_joint_pos.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_walk02_pose.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_walk02_pose.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_walk03_joint_pos.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_walk03_joint_pos.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_walk03_pose.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_walk03_pose.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_walk04_joint_pos.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_walk04_joint_pos.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_walk04_pose.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_walk04_pose.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_walk05_joint_pos.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_walk05_joint_pos.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_walk05_pose.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_walk05_pose.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_walk06_joint_pos.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_walk06_joint_pos.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_walk06_pose.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_walk06_pose.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_walk09_joint_pos.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_walk09_joint_pos.txt
--- a/datasets/keypoint_datasets/ai4animation/dog_walk09_pose.txt
+++ b/datasets/keypoint_datasets/ai4animation/dog_walk09_pose.txt
--- a/datasets/mocap_motions/leftturn0.txt
+++ b/datasets/mocap_motions/leftturn0.txt
--- a/datasets/mocap_motions/pace0.txt
+++ b/datasets/mocap_motions/pace0.txt
--- a/datasets/mocap_motions/pace1.txt
+++ b/datasets/mocap_motions/pace1.txt
--- a/datasets/mocap_motions/rightturn0.txt
+++ b/datasets/mocap_motions/rightturn0.txt
--- a/datasets/mocap_motions/trot0.txt
+++ b/datasets/mocap_motions/trot0.txt
--- a/datasets/mocap_motions/trot1.txt
+++ b/datasets/mocap_motions/trot1.txt
--- a/datasets/mocap_motions_a1/canter0.txt
+++ b/datasets/mocap_motions_a1/canter0.txt
--- a/datasets/mocap_motions_a1/canter1.txt
+++ b/datasets/mocap_motions_a1/canter1.txt
--- a/datasets/mocap_motions_a1/canter2.txt
+++ b/datasets/mocap_motions_a1/canter2.txt
--- a/datasets/mocap_motions_a1/left_turn0.txt
+++ b/datasets/mocap_motions_a1/left_turn0.txt
--- a/datasets/mocap_motions_a1/left_turn1.txt
+++ b/datasets/mocap_motions_a1/left_turn1.txt
--- a/datasets/mocap_motions_a1/pace0.txt
+++ b/datasets/mocap_motions_a1/pace0.txt
--- a/datasets/mocap_motions_a1/pace1.txt
+++ b/datasets/mocap_motions_a1/pace1.txt
--- a/datasets/mocap_motions_a1/pace2.txt
+++ b/datasets/mocap_motions_a1/pace2.txt
--- a/datasets/mocap_motions_a1/right_turn0.txt
+++ b/datasets/mocap_motions_a1/right_turn0.txt
--- a/datasets/mocap_motions_a1/right_turn1.txt
+++ b/datasets/mocap_motions_a1/right_turn1.txt
--- a/datasets/mocap_motions_a1/trot0.txt
+++ b/datasets/mocap_motions_a1/trot0.txt
--- a/datasets/mocap_motions_a1/trot1.txt
+++ b/datasets/mocap_motions_a1/trot1.txt
--- a/datasets/mocap_motions_a1/trot2.txt
+++ b/datasets/mocap_motions_a1/trot2.txt
--- a/datasets/retarget_config.py
+++ b/datasets/retarget_config.py
+import numpy as np
+from legged_gym import LEGGED_GYM_ROOT_DIR
+
+
+VISUALIZE_RETARGETING = True
+
+URDF_FILENAME = "{LEGGED_GYM_ROOT_DIR}/resources/robots/a1/urdf/a1.urdf".format(LEGGED_GYM_ROOT_DIR=LEGGED_GYM_ROOT_DIR)
+
+OUTPUT_DIR = "{LEGGED_GYM_ROOT_DIR}/../datasets/mocap_motions_a1".format(LEGGED_GYM_ROOT_DIR=LEGGED_GYM_ROOT_DIR)
+
+REF_POS_SCALE = 0.825 # 缩放系数,如果遇到关节限位异常，尝试将此数变小
+INIT_POS = np.array([0, 0, 0.32]) # a1
+INIT_ROT = np.array([0, 0, 0, 1.0])
+
+SIM_TOE_JOINT_IDS = [6, 11, 16, 21]
+SIM_HIP_JOINT_IDS = [2, 7, 12, 17]
+SIM_ROOT_OFFSET = np.array([0, 0, -0.04])
+SIM_TOE_OFFSET_LOCAL = [
+    np.array([0.0, -0.06, 0.0]),
+    np.array([0.0, 0.06, 0.0]),
+    np.array([0.0, -0.06, 0.0]),
+    np.array([0.0, 0.06, 0.0])
+]
+TOE_HEIGHT_OFFSET = 0.02
+
+DEFAULT_JOINT_POSE = np.array([0.1, 0.8, -1.5, 
+                               0.1, 1.0, -1.5, 
+                               -0.1, 0.8, -1.5, 
+                               -0.10, 1.0, -1.5])
+JOINT_DAMPING = [0.1, 0.05, 0.01,
+                 0.1, 0.05, 0.01,
+                 0.1, 0.05, 0.01,
+                 0.1, 0.05, 0.01]
+
+FORWARD_DIR_OFFSET = np.array([0, 0, 0])
+
+FR_FOOT_NAME = "FR_foot"
+FL_FOOT_NAME = "FL_foot"
+HR_FOOT_NAME = "RR_foot"
+HL_FOOT_NAME = "RL_foot"
+
+MOCAP_MOTIONS = [
+    # Output motion name, input file, frame start, frame end, motion weight.
+    ["pace0", "datasets/keypoint_datasets/ai4animation/dog_walk00_joint_pos.txt", 162, 201, 1],
+    ["pace1", "datasets/keypoint_datasets/ai4animation/dog_walk00_joint_pos.txt", 201, 400, 1],
+    ["pace2", "datasets/keypoint_datasets/ai4animation/dog_walk00_joint_pos.txt", 400, 600, 1],
+    ["trot0", "datasets/keypoint_datasets/ai4animation/dog_walk03_joint_pos.txt", 448, 481, 1],
+    ["trot1", "datasets/keypoint_datasets/ai4animation/dog_walk03_joint_pos.txt", 400, 600, 1],
+    ["trot2", "datasets/keypoint_datasets/ai4animation/dog_run04_joint_pos.txt", 480, 663, 1],
+    ["canter0", "datasets/keypoint_datasets/ai4animation/dog_run00_joint_pos.txt", 430, 480, 1],
+    ["canter1", "datasets/keypoint_datasets/ai4animation/dog_run00_joint_pos.txt", 380, 430, 1],
+    ["canter2", "datasets/keypoint_datasets/ai4animation/dog_run00_joint_pos.txt", 480, 566, 1],
+    ["right_turn0", "datasets/keypoint_datasets/ai4animation/dog_walk09_joint_pos.txt", 1085, 1124, 1.5],
+    ["right_turn1", "datasets/keypoint_datasets/ai4animation/dog_walk09_joint_pos.txt", 560, 670, 1.5],
+    ["left_turn0", "datasets/keypoint_datasets/ai4animation/dog_walk09_joint_pos.txt", 2404, 2450, 1.5],
+    ["left_turn1", "datasets/keypoint_datasets/ai4animation/dog_walk09_joint_pos.txt", 120, 220, 1.5]
+]
--- a/datasets/retarget_kp_motions.py
+++ b/datasets/retarget_kp_motions.py
--- a/datasets/retarget_utils.py
+++ b/datasets/retarget_utils.py
+import numpy as np
+from pybullet_utils import transformations
+import pybullet
+
+from rsl_rl.datasets import pose3d
+
+POS_SIZE = 3
+ROT_SIZE = 4
+
+
+def get_root_pos(pose):
+    return pose[0:POS_SIZE]
+
+
+def get_root_rot(pose):
+    return pose[POS_SIZE : (POS_SIZE + ROT_SIZE)]
+
+
+def calc_heading(q):
+    """Returns the heading of a rotation q, specified as a quaternion.
+
+    The heading represents the rotational component of q along the vertical
+    axis (z axis).
+
+    Args:
+      q: A quaternion that the heading is to be computed from.
+
+    Returns:
+      An angle representing the rotation about the z axis.
+
+    """
+    ref_dir = np.array([1, 0, 0])
+    rot_dir = pose3d.QuaternionRotatePoint(ref_dir, q)
+    heading = np.arctan2(rot_dir[1], rot_dir[0])
+    return heading
+
+
+def calc_heading_rot(q):
+    """Return a quaternion representing the heading rotation of q along the vertical axis (z axis).
+
+    Args:
+      q: A quaternion that the heading is to be computed from.
+
+    Returns:
+      A quaternion representing the rotation about the z axis.
+
+    """
+    heading = calc_heading(q)
+    q_heading = transformations.quaternion_about_axis(heading, [0, 0, 1])
+    return q_heading
+
+
+def get_joint_limits(robot):
+    num_joints = pybullet.getNumJoints(robot)
+    joint_limit_low = []
+    joint_limit_high = []
+
+    for i in range(num_joints):
+        joint_info = pybullet.getJointInfo(robot, i)
+        joint_type = joint_info[2]
+
+        if (
+            joint_type == pybullet.JOINT_PRISMATIC
+            or joint_type == pybullet.JOINT_REVOLUTE
+        ):
+            joint_limit_low.append(joint_info[8])
+            joint_limit_high.append(joint_info[9])
+
+    return joint_limit_low, joint_limit_high
+
+
+def set_pose(robot, pose):
+    num_joints = pybullet.getNumJoints(robot)
+    root_pos = get_root_pos(pose)
+    root_rot = get_root_rot(pose)
+    pybullet.resetBasePositionAndOrientation(robot, root_pos, root_rot)
+
+    for j in range(num_joints):
+        j_info = pybullet.getJointInfo(robot, j)
+        j_state = pybullet.getJointStateMultiDof(robot, j)
+
+        j_pose_idx = j_info[3]
+        j_pose_size = len(j_state[0])
+        j_vel_size = len(j_state[1])
+
+        if j_pose_size > 0:
+            j_pose = pose[j_pose_idx : (j_pose_idx + j_pose_size)]
+            j_vel = np.zeros(j_vel_size)
+            pybullet.resetJointStateMultiDof(robot, j, j_pose, j_vel)
+
+
+def output_motion(frames, out_filename, motion_weight, frame_duration):
+    with open(out_filename, "w") as f:
+        f.write("{\n")
+        f.write('"LoopMode": "Wrap",\n')
+        f.write('"FrameDuration": ' + str(frame_duration) + ",\n")
+        f.write('"EnableCycleOffsetPosition": true,\n')
+        f.write('"EnableCycleOffsetRotation": true,\n')
+        f.write('"MotionWeight": ' + str(motion_weight) + ",\n")
+        f.write("\n")
+
+        f.write('"Frames":\n')
+
+        f.write("[")
+        for i in range(frames.shape[0]):
+            curr_frame = frames[i]
+
+            if i != 0:
+                f.write(",")
+            f.write("\n  [")
+
+            for j in range(frames.shape[1]):
+                curr_val = curr_frame[j]
+                if j != 0:
+                    f.write(", ")
+                f.write("%.5f" % curr_val)
+
+            f.write("]")
+
+        f.write("\n]")
+        f.write("\n}")
+
+    return
+
+
+###########################
+## Visualization methods ##
+###########################
+def set_linear_vel_pos(linear_vel, robot_idx, unique_id=None):
+    ray_start = np.array([0.0, 0.0, 0.25])
+    ray_end = ray_start + linear_vel / 3.0
+
+    if unique_id is not None:
+        return pybullet.addUserDebugLine(
+            lineFromXYZ=ray_start,
+            lineToXYZ=ray_end,
+            lineWidth=4,
+            replaceItemUniqueId=unique_id,
+            lineColorRGB=[1, 0, 0],
+            parentObjectUniqueId=robot_idx,
+        )
+    else:
+        return pybullet.addUserDebugLine(
+            lineFromXYZ=ray_start,
+            lineToXYZ=ray_end,
+            lineWidth=4,
+            lineColorRGB=[1, 0, 0],
+            parentObjectUniqueId=robot_idx,
+        )
+
+
+def set_angular_vel_pos(angular_vel, robot_idx, unique_id=None):
+    ray_start = np.array([0.0, 0.0, 0.0])
+    ray_end = ray_start + angular_vel
+
+    if unique_id is not None:
+        return pybullet.addUserDebugLine(
+            lineFromXYZ=ray_start,
+            lineToXYZ=ray_end,
+            lineWidth=4,
+            replaceItemUniqueId=unique_id,
+            lineColorRGB=[1, 0, 0],
+            parentObjectUniqueId=robot_idx,
+        )
+    else:
+        return pybullet.addUserDebugLine(
+            lineFromXYZ=ray_start,
+            lineToXYZ=ray_end,
+            lineWidth=4,
+            lineColorRGB=[1, 0, 0],
+            parentObjectUniqueId=robot_idx,
+        )
+
+
+def build_markers(num_markers, special_idx, special_colors):
+    marker_radius = 0.01
+    markers = []
+    for i in range(num_markers):
+        if i in special_idx:
+            color = special_colors[special_idx.index(i)]
+        else:
+            color = [1, 1, 1, 1]
+
+        virtual_shape_id = pybullet.createVisualShape(
+            shapeType=pybullet.GEOM_SPHERE, radius=marker_radius, rgbaColor=color
+        )
+        body_id = pybullet.createMultiBody(
+            baseMass=0,
+            baseCollisionShapeIndex=-1,
+            baseVisualShapeIndex=virtual_shape_id,
+            basePosition=[0, 0, 0],
+            useMaximalCoordinates=True,
+        )
+        markers.append(body_id)
+    return markers
+
+
+def update_marker_pos(marker_pos, marker_ids):
+    num_markers = len(marker_ids)
+    assert num_markers == marker_pos.shape[0]
+
+    for i in range(num_markers):
+        curr_id = marker_ids[i]
+        curr_pos = marker_pos[i]
+        pybullet.resetBasePositionAndOrientation(curr_id, curr_pos, [0, 0, 0, 1])
+    return marker_ids
+
+
+def build_coordinate_frame():
+    dir_line_1 = pybullet.addUserDebugLine([0, 0, 0], [1, 1, 1], [1, 0, 0])
+    dir_line_2 = pybullet.addUserDebugLine([0, 0, 0], [1, 1, 1], [1, 0, 0])
+    dir_line_3 = pybullet.addUserDebugLine([0, 0, 0], [1, 1, 1], [1, 0, 0])
+    return [dir_line_1, dir_line_2, dir_line_3]
+
+
+def update_coordinate_frame(lines, origin, H, R, scale=0.25):
+    for i in range(3):
+        color = [0, 0, 0]
+        color[i] = 1
+        lines[i] = pybullet.addUserDebugLine(
+            lineFromXYZ=origin,
+            lineToXYZ=H + R.dot(np.array(color)) * scale,
+            replaceItemUniqueId=lines[i],
+            lineColorRGB=color,
+        )
+    return lines
--- a/humanoid/algo/loadamp/amp_utils.py
+++ b/humanoid/algo/loadamp/amp_utils.py
+import numpy as np
+import torch
+from typing import Tuple
+
+_EPS = np.finfo(float).eps * 4.0
+
+
+class RunningMeanStd:
+    def __init__(self, epsilon: float = 1e-4, shape: Tuple[int, ...] = ()):
+        """
+        Calculates the running mean and std of a data stream
+        https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
+        :param epsilon: helps with arithmetic issues
+        :param shape: the shape of the data stream's output
+        """
+        self.mean = np.zeros(shape, np.float64)
+        self.var = np.ones(shape, np.float64)
+        self.count = epsilon
+
+    def update(self, arr: np.ndarray) -> None:
+        batch_mean = np.mean(arr, axis=0)
+        batch_var = np.var(arr, axis=0)
+        batch_count = arr.shape[0]
+        self.update_from_moments(batch_mean, batch_var, batch_count)
+
+    def update_from_moments(self, batch_mean: np.ndarray, batch_var: np.ndarray, batch_count: int) -> None:
+        delta = batch_mean - self.mean
+        tot_count = self.count + batch_count
+
+        new_mean = self.mean + delta * batch_count / tot_count
+        m_a = self.var * self.count
+        m_b = batch_var * batch_count
+        m_2 = m_a + m_b + np.square(delta) * self.count * batch_count / (self.count + batch_count)
+        new_var = m_2 / (self.count + batch_count)
+
+        new_count = batch_count + self.count
+
+        self.mean = new_mean
+        self.var = new_var
+        self.count = new_count
+
+
+class Normalizer(RunningMeanStd):
+    def __init__(self, input_dim, epsilon=1e-4, clip_obs=10.0):
+        super().__init__(shape=input_dim)
+        self.epsilon = epsilon
+        self.clip_obs = clip_obs
+
+    def normalize(self, input):
+        return np.clip((input - self.mean) / np.sqrt(self.var + self.epsilon), -self.clip_obs, self.clip_obs)
+
+    def normalize_torch(self, input, device):
+        mean_torch = torch.tensor(self.mean, device=device, dtype=torch.float32)
+        std_torch = torch.sqrt(torch.tensor(self.var + self.epsilon, device=device, dtype=torch.float32))
+        return torch.clamp((input - mean_torch) / std_torch, -self.clip_obs, self.clip_obs)
+
+    def update_normalizer(self, rollouts, expert_loader):
+        policy_data_generator = rollouts.feed_forward_generator_amp(None, mini_batch_size=expert_loader.batch_size)
+        expert_data_generator = expert_loader.dataset.feed_forward_generator_amp(expert_loader.batch_size)
+
+        for expert_batch, policy_batch in zip(expert_data_generator, policy_data_generator):
+            self.update(torch.vstack(tuple(policy_batch) + tuple(expert_batch)).cpu().numpy())
+
+
+def quaternion_slerp(q0, q1, fraction, spin=0, shortestpath=True):
+    """Batch quaternion spherical linear interpolation."""
+
+    out = torch.zeros_like(q0)
+
+    zero_mask = torch.isclose(fraction, torch.zeros_like(fraction)).squeeze()
+    ones_mask = torch.isclose(fraction, torch.ones_like(fraction)).squeeze()
+    out[zero_mask] = q0[zero_mask]
+    out[ones_mask] = q1[ones_mask]
+
+    d = torch.sum(q0 * q1, dim=-1, keepdim=True)
+    dist_mask = (torch.abs(torch.abs(d) - 1.0) < _EPS).squeeze()
+    out[dist_mask] = q0[dist_mask]
+
+    if shortestpath:
+        d_old = torch.clone(d)
+        d = torch.where(d_old < 0, -d, d)
+        q1 = torch.where(d_old < 0, -q1, q1)
+
+    angle = torch.acos(d) + spin * torch.pi
+    angle_mask = (torch.abs(angle) < _EPS).squeeze()
+    out[angle_mask] = q0[angle_mask]
+
+    final_mask = torch.logical_or(zero_mask, ones_mask)
+    final_mask = torch.logical_or(final_mask, dist_mask)
+    final_mask = torch.logical_or(final_mask, angle_mask)
+    final_mask = torch.logical_not(final_mask)
+
+    isin = 1.0 / angle
+    q0 *= torch.sin((1.0 - fraction) * angle) * isin
+    q1 *= torch.sin(fraction * angle) * isin
+    q0 += q1
+    out[final_mask] = q0[final_mask]
+    return out
+
+def split_and_pad_trajectories(tensor, dones):
+    """ Splits trajectories at done indices. Then concatenates them and padds with zeros up to the length og the longest trajectory.
+    Returns masks corresponding to valid parts of the trajectories
+    Example: 
+        Input: [ [a1, a2, a3, a4 | a5, a6],
+                 [b1, b2 | b3, b4, b5 | b6]
+                ]
+
+        Output:[ [a1, a2, a3, a4], | [  [True, True, True, True],
+                 [a5, a6, 0, 0],   |    [True, True, False, False],
+                 [b1, b2, 0, 0],   |    [True, True, False, False],
+                 [b3, b4, b5, 0],  |    [True, True, True, False],
+                 [b6, 0, 0, 0]     |    [True, False, False, False],
+                ]                  | ]    
+            
+    Assumes that the inputy has the following dimension order: [time, number of envs, aditional dimensions]
+    """
+    dones = dones.clone()
+    dones[-1] = 1
+    # Permute the buffers to have order (num_envs, num_transitions_per_env, ...), for correct reshaping
+    flat_dones = dones.transpose(1, 0).reshape(-1, 1)
+
+    # Get length of trajectory by counting the number of successive not done elements
+    done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero()[:, 0]))
+    trajectory_lengths = done_indices[1:] - done_indices[:-1]
+    trajectory_lengths_list = trajectory_lengths.tolist()
+    # Extract the individual trajectories
+    trajectories = torch.split(tensor.transpose(1, 0).flatten(0, 1),trajectory_lengths_list)
+    padded_trajectories = torch.nn.utils.rnn.pad_sequence(trajectories)
+
+
+    trajectory_masks = trajectory_lengths > torch.arange(0, tensor.shape[0], device=tensor.device).unsqueeze(1)
+    return padded_trajectories, trajectory_masks
+
+def unpad_trajectories(trajectories, masks):
+    """ Does the inverse operation of  split_and_pad_trajectories()
+    """
+    # Need to transpose before and after the masking to have proper reshaping
+    return trajectories.transpose(1, 0)[masks.transpose(1, 0)].view(-1, trajectories.shape[0], trajectories.shape[-1]).transpose(1, 0)
+
+from typing import Tuple
+import numpy as np
+_EPS = np.finfo(float).eps * 4.0
+
+class RunningMeanStd(object):
+    def __init__(self, epsilon: float = 1e-4, shape: Tuple[int, ...] = ()):
+        """
+        Calulates the running mean and std of a data stream
+        https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
+        :param epsilon: helps with arithmetic issues
+        :param shape: the shape of the data stream's output
+        """
+        self.mean = np.zeros(shape, np.float64)
+        self.var = np.ones(shape, np.float64)
+        self.count = epsilon
+
+    def update(self, arr: np.ndarray) -> None:
+        batch_mean = np.mean(arr, axis=0)
+        batch_var = np.var(arr, axis=0)
+        batch_count = arr.shape[0]
+        self.update_from_moments(batch_mean, batch_var, batch_count)
+
+    def update_from_moments(self, batch_mean: np.ndarray, batch_var: np.ndarray, batch_count: int) -> None:
+        delta = batch_mean - self.mean
+        tot_count = self.count + batch_count
+
+        new_mean = self.mean + delta * batch_count / tot_count
+        m_a = self.var * self.count
+        m_b = batch_var * batch_count
+        m_2 = m_a + m_b + np.square(delta) * self.count * batch_count / (self.count + batch_count)
+        new_var = m_2 / (self.count + batch_count)
+
+        new_count = batch_count + self.count
+
+        self.mean = new_mean
+        self.var = new_var
+        self.count = new_count
+
+
+class Normalizer(RunningMeanStd):
+    def __init__(self, input_dim, epsilon=1e-4, clip_obs=10.0):
+        super().__init__(shape=input_dim)
+        self.epsilon = epsilon
+        self.clip_obs = clip_obs
+
+    def normalize(self, input):
+        return np.clip(
+            (input - self.mean) / np.sqrt(self.var + self.epsilon),
+            -self.clip_obs, self.clip_obs)
+
+    def normalize_torch(self, input, device):
+        mean_torch = torch.tensor(
+            self.mean, device=device, dtype=torch.float32)
+        std_torch = torch.sqrt(torch.tensor(
+            self.var + self.epsilon, device=device, dtype=torch.float32))
+        return torch.clamp(
+            (input - mean_torch) / std_torch, -self.clip_obs, self.clip_obs)
+
+    def update_normalizer(self, rollouts, expert_loader):
+        policy_data_generator = rollouts.feed_forward_generator_amp(
+            None, mini_batch_size=expert_loader.batch_size)
+        expert_data_generator = expert_loader.dataset.feed_forward_generator_amp(
+                expert_loader.batch_size)
+
+        for expert_batch, policy_batch in zip(expert_data_generator, policy_data_generator):
+            self.update(
+                torch.vstack(tuple(policy_batch) + tuple(expert_batch)).cpu().numpy())
+
+def quaternion_slerp(q0, q1, fraction, spin=0, shortestpath=True):
+    """Batch quaternion spherical linear interpolation."""
+
+    out = torch.zeros_like(q0)
+
+    zero_mask = torch.isclose(fraction, torch.zeros_like(fraction)).squeeze()
+    ones_mask = torch.isclose(fraction, torch.ones_like(fraction)).squeeze()
+    out[zero_mask] = q0[zero_mask]
+    out[ones_mask] = q1[ones_mask]
+
+    d = torch.sum(q0 * q1, dim=-1, keepdim=True)
+    dist_mask = (torch.abs(torch.abs(d) - 1.0) < _EPS).squeeze()
+    out[dist_mask] = q0[dist_mask]
+
+    if shortestpath:
+        d_old = torch.clone(d)
+        d = torch.where(d_old < 0, -d, d)
+        q1 = torch.where(d_old < 0, -q1, q1)
+
+    angle = torch.acos(d) + spin * torch.pi
+    angle_mask = (torch.abs(angle) < _EPS).squeeze()
+    out[angle_mask] = q0[angle_mask]
+
+    final_mask = torch.logical_or(zero_mask, ones_mask)
+    final_mask = torch.logical_or(final_mask, dist_mask)
+    final_mask = torch.logical_or(final_mask, angle_mask)
+    final_mask = torch.logical_not(final_mask)
+
+    isin = 1.0 / angle
+    q0 *= torch.sin((1.0 - fraction) * angle) * isin
+    q1 *= torch.sin(fraction * angle) * isin
+    q0 += q1
+    out[final_mask] = q0[final_mask]
+    return out
--- a/humanoid/algo/loadamp/motion_loader copy.py
+++ b/humanoid/algo/loadamp/motion_loader copy.py
--- a/humanoid/algo/loadamp/motion_loader.py
+++ b/humanoid/algo/loadamp/motion_loader.py
--- a/humanoid/algo/loadamp/motion_util.py
+++ b/humanoid/algo/loadamp/motion_util.py
+# coding=utf-8
+# Copyright 2020 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utility functions for processing motion clips."""
+
+import os
+import inspect
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(os.path.dirname(currentdir))
+os.sys.path.insert(0, parentdir)
+
+import numpy as np
+
+from .pose3d import *
+from pybullet_utils import transformations
+
+
+def standardize_quaternion(q):
+  """Returns a quaternion where q.w >= 0 to remove redundancy due to q = -q.
+
+  Args:
+    q: A quaternion to be standardized.
+
+  Returns:
+    A quaternion with q.w >= 0.
+
+  """
+  if q[-1] < 0:
+    q = -q
+  return q
+
+
+def normalize_rotation_angle(theta):
+  """Returns a rotation angle normalized between [-pi, pi].
+
+  Args:
+    theta: angle of rotation (radians).
+
+  Returns:
+    An angle of rotation normalized between [-pi, pi].
+
+  """
+  norm_theta = theta
+  if np.abs(norm_theta) > np.pi:
+    norm_theta = np.fmod(norm_theta, 2 * np.pi)
+    if norm_theta >= 0:
+      norm_theta += -2 * np.pi
+    else:
+      norm_theta += 2 * np.pi
+
+  return norm_theta
+
+
+def calc_heading(q):
+  """Returns the heading of a rotation q, specified as a quaternion.
+
+  The heading represents the rotational component of q along the vertical
+  axis (z axis).
+
+  Args:
+    q: A quaternion that the heading is to be computed from.
+
+  Returns:
+    An angle representing the rotation about the z axis.
+
+  """
+  ref_dir = np.array([1, 0, 0])
+  rot_dir = pose3d.QuaternionRotatePoint(ref_dir, q)
+  heading = np.arctan2(rot_dir[1], rot_dir[0])
+  return heading
+
+
+def calc_heading_rot(q):
+  """Return a quaternion representing the heading rotation of q along the vertical axis (z axis).
+
+  Args:
+    q: A quaternion that the heading is to be computed from.
+
+  Returns:
+    A quaternion representing the rotation about the z axis.
+
+  """
+  heading = calc_heading(q)
+  q_heading = transformations.quaternion_about_axis(heading, [0, 0, 1])
+  return q_heading
--- a/humanoid/algo/loadamp/pose3d.py
+++ b/humanoid/algo/loadamp/pose3d.py
+# coding=utf-8
+# Copyright 2020 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utilities for 3D pose conversion."""
+import math
+import numpy as np
+
+from pybullet_utils import transformations
+
+VECTOR3_0 = np.zeros(3, dtype=np.float64)
+VECTOR3_1 = np.ones(3, dtype=np.float64)
+VECTOR3_X = np.array([1, 0, 0], dtype=np.float64)
+VECTOR3_Y = np.array([0, 1, 0], dtype=np.float64)
+VECTOR3_Z = np.array([0, 0, 1], dtype=np.float64)
+
+# QUATERNION_IDENTITY is the multiplicative identity 1.0 + 0i + 0j + 0k.
+# When interpreted as a rotation, it is the identity rotation.
+QUATERNION_IDENTITY = np.array([0.0, 0.0, 0.0, 1.0], dtype=np.float64)
+
+
+def Vector3RandomNormal(sigma, mu=VECTOR3_0):
+  """Returns a random 3D vector from a normal distribution.
+
+  Each component is selected independently from a normal distribution.
+
+  Args:
+    sigma: Scale (or stddev) of distribution for all variables.
+    mu: Mean of distribution for each variable.
+
+  Returns:
+    A 3D vector in a numpy array.
+  """
+
+  random_v3 = np.random.normal(scale=sigma, size=3) + mu
+  return random_v3
+
+
+def Vector3RandomUniform(low=VECTOR3_0, high=VECTOR3_1):
+  """Returns a 3D vector selected uniformly from the input box.
+
+  Args:
+    low: The min-value corner of the box.
+    high: The max-value corner of the box.
+
+  Returns:
+    A 3D vector in a numpy array.
+  """
+
+  random_x = np.random.uniform(low=low[0], high=high[0])
+  random_y = np.random.uniform(low=low[1], high=high[1])
+  random_z = np.random.uniform(low=low[2], high=high[2])
+  return np.array([random_x, random_y, random_z])
+
+
+def Vector3RandomUnit():
+  """Returns a random 3D vector with unit length.
+
+  Generates a 3D vector selected uniformly from the unit sphere.
+
+  Returns:
+    A normalized 3D vector in a numpy array.
+  """
+  longitude = np.random.uniform(low=-math.pi, high=math.pi)
+  sin_latitude = np.random.uniform(low=-1.0, high=1.0)
+  cos_latitude = math.sqrt(1.0 - sin_latitude * sin_latitude)
+  x = math.cos(longitude) * cos_latitude
+  y = math.sin(longitude) * cos_latitude
+  z = sin_latitude
+  return np.array([x, y, z], dtype=np.float64)
+
+
+def QuaternionNormalize(q):
+  """Normalizes the quaternion to length 1.
+
+  Divides the quaternion by its magnitude.  If the magnitude is too
+  small, returns the quaternion identity value (1.0).
+
+  Args:
+    q: A quaternion to be normalized.
+
+  Raises:
+    ValueError: If input quaternion has length near zero.
+
+  Returns:
+    A quaternion with magnitude 1 in a numpy array [x, y, z, w].
+
+  """
+  q_norm = np.linalg.norm(q)
+  if np.isclose(q_norm, 0.0):
+    raise ValueError(
+        'Quaternion may not be zero in QuaternionNormalize: |q| = %f, q = %s' %
+        (q_norm, q))
+  return q / q_norm
+
+
+def QuaternionFromAxisAngle(axis, angle):
+  """Returns a quaternion that generates the given axis-angle rotation.
+
+  Returns the quaternion: sin(angle/2) * axis + cos(angle/2).
+
+  Args:
+    axis: Axis of rotation, a 3D vector in a numpy array.
+    angle: The angle of rotation (radians).
+
+  Raises:
+    ValueError: If input axis is not a normalizable 3D vector.
+
+  Returns:
+    A unit quaternion in a numpy array.
+
+  """
+  if len(axis) != 3:
+    raise ValueError('Axis vector should have three components: %s' % axis)
+  axis_norm = np.linalg.norm(axis)
+  if np.isclose(axis_norm, 0.0):
+    raise ValueError('Axis vector may not have zero length: |v| = %f, v = %s' %
+                     (axis_norm, axis))
+  half_angle = angle * 0.5
+  q = np.zeros(4, dtype=np.float64)
+  q[0:3] = axis
+  q[0:3] *= math.sin(half_angle) / axis_norm
+  q[3] = math.cos(half_angle)
+  return q
+
+
+def QuaternionToAxisAngle(quat, default_axis=VECTOR3_Z, direction_axis=None):
+  """Calculates axis and angle of rotation performed by a quaternion.
+
+  Calculates the axis and angle of the rotation performed by the quaternion.
+  The quaternion should have four values and be normalized.
+
+  Args:
+    quat: Unit quaternion in a numpy array.
+    default_axis: 3D vector axis used if the rotation is near to zero. Without
+      this default, small rotations would result in an exception.  It is
+      reasonable to use a default axis for tiny rotations, because zero angle
+      rotations about any axis are equivalent.
+    direction_axis: Used to disambiguate rotation directions.  If the
+      direction_axis is specified, the axis of the rotation will be chosen such
+      that its inner product with the direction_axis is non-negative.
+
+  Raises:
+    ValueError: If quat is not a normalized quaternion.
+
+  Returns:
+    axis: Axis of rotation.
+    angle: Angle in radians.
+  """
+  if len(quat) != 4:
+    raise ValueError(
+        'Quaternion should have four components [x, y, z, w]: %s' % quat)
+  if not np.isclose(1.0, np.linalg.norm(quat)):
+    raise ValueError('Quaternion should have unit length: |q| = %f, q = %s' %
+                     (np.linalg.norm(quat), quat))
+  axis = quat[:3].copy()
+  axis_norm = np.linalg.norm(axis)
+  min_axis_norm = 1e-8
+  if axis_norm < min_axis_norm:
+    axis = default_axis
+    if len(default_axis) != 3:
+      raise ValueError('Axis vector should have three components: %s' % axis)
+    if not np.isclose(np.linalg.norm(axis), 1.0):
+      raise ValueError('Axis vector should have unit length: |v| = %f, v = %s' %
+                       (np.linalg.norm(axis), axis))
+  else:
+    axis /= axis_norm
+  sin_half_angle = axis_norm
+  if direction_axis is not None and np.inner(axis, direction_axis) < 0:
+    sin_half_angle = -sin_half_angle
+    axis = -axis
+  cos_half_angle = quat[3]
+  half_angle = math.atan2(sin_half_angle, cos_half_angle)
+  angle = half_angle * 2
+  return axis, angle
+
+
+def QuaternionRandomRotation(max_angle=math.pi):
+  """Creates a random small rotation around a random axis.
+
+  Generates a small rotation with the axis vector selected uniformly
+  from the unit sphere and an angle selected from a uniform
+  distribution over [0, max_angle].
+
+  If the max_angle is not specified, the rotation should be selected
+  uniformly over all possible rotation angles.
+
+  Args:
+    max_angle: The maximum angle of rotation (radians).
+
+  Returns:
+    A unit quaternion in a numpy array.
+
+  """
+
+  angle = np.random.uniform(low=0, high=max_angle)
+  axis = Vector3RandomUnit()
+  return QuaternionFromAxisAngle(axis, angle)
+
+
+def QuaternionRotatePoint(point, quat):
+  """Performs a rotation by quaternion.
+
+  Rotate the point by the quaternion using quaternion multiplication,
+  (q * p * q^-1), without constructing the rotation matrix.
+
+  Args:
+    point: The point to be rotated.
+    quat: The rotation represented as a quaternion [x, y, z, w].
+
+  Returns:
+    A 3D vector in a numpy array.
+  """
+
+  q_point = np.array([point[0], point[1], point[2], 0.0])
+  quat_inverse = transformations.quaternion_inverse(quat)
+  q_point_rotated = transformations.quaternion_multiply(
+      transformations.quaternion_multiply(quat, q_point), quat_inverse)
+  return q_point_rotated[:3]
+
+
+def IsRotationMatrix(m):
+  """Returns true if the 3x3 submatrix represents a rotation.
+
+  Args:
+    m: A transformation matrix.
+
+  Raises:
+    ValueError: If input is not a matrix of size at least 3x3.
+
+  Returns:
+    True if the 3x3 submatrix is a rotation (orthogonal).
+  """
+  if len(m.shape) != 2 or m.shape[0] < 3 or m.shape[1] < 3:
+    raise ValueError('Matrix should be 3x3 or 4x4: %s\n %s' % (m.shape, m))
+  rot = m[:3, :3]
+  eye = np.matmul(rot, np.transpose(rot))
+  return np.isclose(eye, np.identity(3), atol=1e-4).all()
+
+# def ZAxisAlignedRobotPoseTool(robot_pose_tool):
+#   """Returns the current gripper pose rotated for alignment with the z-axis.
+
+#   Args:
+#     robot_pose_tool: a pose3d.Pose3d() instance.
+
+#   Returns:
+#     An instance of pose.Transform representing the current gripper pose
+#     rotated for alignment with the z-axis.
+#   """
+#   # Align the current pose to the z-axis.
+#   robot_pose_tool.quaternion = transformations.quaternion_multiply(
+#       RotationBetween(
+#           robot_pose_tool.matrix4x4[0:3, 0:3].dot(np.array([0, 0, 1])),
+#           np.array([0.0, 0.0, -1.0])), robot_pose_tool.quaternion)
+#   return robot_pose_tool
+
+# def RotationBetween(a_translation_b, a_translation_c):
+#   """Computes the rotation from one vector to another.
+
+#   The computed rotation has the property that:
+
+#     a_translation_c = a_rotation_b_to_c * a_translation_b
+
+#   Args:
+#     a_translation_b: vec3, vector to rotate from
+#     a_translation_c: vec3, vector to rotate to
+
+#   Returns:
+#     a_rotation_b_to_c: new Orientation
+#   """
+#   rotation = rotation3.Rotation3.rotation_between(
+#       a_translation_b, a_translation_c, err_msg='RotationBetween')
+#   return rotation.quaternion.xyzw
--- a/humanoid/algo/ppo/amp_discriminator.py
+++ b/humanoid/algo/ppo/amp_discriminator.py
+import torch
+import torch.nn as nn
+from torch import autograd
+
+
+class AMPDiscriminator(nn.Module):
+    def __init__(
+            self, input_dim, amp_reward_coef, hidden_layer_sizes, device, task_reward_lerp=0.0):
+        super(AMPDiscriminator, self).__init__()
+
+        self.device = device
+        self.input_dim = input_dim
+
+        self.amp_reward_coef = amp_reward_coef
+        amp_layers = []
+        curr_in_dim = input_dim
+        for hidden_dim in hidden_layer_sizes:
+            amp_layers.append(nn.Linear(curr_in_dim, hidden_dim))
+            amp_layers.append(nn.LeakyReLU())
+            curr_in_dim = hidden_dim
+        self.trunk = nn.Sequential(*amp_layers).to(device)
+        self.amp_linear = nn.Linear(hidden_layer_sizes[-1], 1).to(device)
+
+        self.trunk.train()
+        self.amp_linear.train()
+
+        self.task_reward_lerp = task_reward_lerp
+
+    def forward(self, x):
+        h = self.trunk(x)
+        d = self.amp_linear(h)
+        return d
+
+    def compute_grad_pen(self,
+                         expert_state,
+                         expert_next_state,
+                         lambda_=10):
+        expert_data = torch.cat([expert_state, expert_next_state], dim=-1)
+        expert_data.requires_grad = True
+
+        disc = self.amp_linear(self.trunk(expert_data))
+        ones = torch.ones(disc.size(), device=disc.device)
+        grad = autograd.grad(
+            outputs=disc, inputs=expert_data,
+            grad_outputs=ones, create_graph=True,
+            retain_graph=True, only_inputs=True)[0]
+
+        # Enforce that the grad norm approaches 0.
+        grad_pen = lambda_ * (grad.norm(2, dim=1) - 0).pow(2).mean()
+        return grad_pen
+
+    def predict_amp_reward(
+            self, state, next_state, task_reward, normalizer=None):
+        with torch.no_grad():
+            self.eval()
+            if normalizer is not None:
+                state = normalizer.normalize_torch(state, self.device)
+                next_state = normalizer.normalize_torch(next_state, self.device)
+
+            d = self.amp_linear(self.trunk(torch.cat([state, next_state], dim=-1)))
+            # reward = self.amp_reward_coef * torch.clamp(1 - (1/4) * torch.square(torch.zeros_like(d)), min=0)
+            reward = self.amp_reward_coef * torch.clamp(1 - (1/4) * torch.square(d - 1), min=0)
+            amp_reward = reward.clone()
+            if self.task_reward_lerp > 0:
+                reward = self._lerp_reward(reward, task_reward.unsqueeze(-1))
+            b=(1.0 - self.task_reward_lerp) * amp_reward.squeeze()
+            c= self.task_reward_lerp * task_reward.squeeze()
+            self.train()
+        return reward.squeeze(), b, c
+
+    def _lerp_reward(self, disc_r, task_r):
+        r = (1.0 - self.task_reward_lerp) * disc_r + self.task_reward_lerp * task_r
+        return r
\ No newline at end of file
--- a/humanoid/algo/ppo/dh_on_policy_runner.py
+++ b/humanoid/algo/ppo/dh_on_policy_runner.py
@@ -41,7 +41,9 @@ from .dh_ppo import DHPPO
 from .actor_critic_dh import ActorCriticDH
 from humanoid.algo.vec_env import VecEnv
 from torch.utils.tensorboard import SummaryWriter
-
+from ..loadamp.motion_loader import AMPLoader
+from ..loadamp.amp_utils import Normalizer
+from .amp_discriminator import AMPDiscriminator
 class DHOnPolicyRunner:

    def __init__(self, env: VecEnv, train_cfg, log_dir=None, device="cpu"):
@@ -70,8 +72,24 @@ class DHOnPolicyRunner:
            self.env.num_short_obs, self.env.num_single_obs, num_critic_obs, self.env.num_actions, **self.policy_cfg
        ).to(self.device)

+        amp_data = AMPLoader(
+            device,
+            motion_files=self.cfg["amp_motion_files"],
+            time_between_frames=self.env.dt,
+            preload_transitions=True,
+            num_preload_transitions=train_cfg['runner']['amp_num_preload_transitions'])     
+        amp_normalizer = Normalizer(amp_data.observation_dim)
+        discriminator = AMPDiscriminator(
+            amp_data.observation_dim * 2,
+            train_cfg['runner']['amp_reward_coef'],
+            train_cfg['runner']['amp_discr_hidden_dims'], device,
+            train_cfg['runner']['amp_task_reward_lerp']).to(self.device)
+        min_std = (
+            torch.tensor(self.cfg["min_normalized_std"], device=self.device) *
+            (torch.abs(self.env.dof_pos_limits[:, 1] - self.env.dof_pos_limits[:, 0])))
+                   
        alg_class = eval(self.cfg["algorithm_class_name"])  # PPO
-        self.alg: DHPPO = alg_class(actor_critic, device=self.device, **self.alg_cfg)
+        self.alg: DHPPO = alg_class(actor_critic,discriminator, amp_data, amp_normalizer, min_std, device=self.device, **self.alg_cfg)
        self.num_steps_per_env = self.cfg["num_steps_per_env"]
        self.save_interval = self.cfg["save_interval"]

@@ -106,17 +124,26 @@ class DHOnPolicyRunner:
        privileged_obs = self.env.get_privileged_observations()
        critic_obs = privileged_obs if privileged_obs is not None else obs
        obs, critic_obs = obs.to(self.device), critic_obs.to(self.device)
-        self.alg.actor_critic.train()  # switch to train mode (for dropout for example)

+        amp_obs = self.env.get_amp_observations()
+        amp_obs = amp_obs.to(self.device)
+
+        self.alg.actor_critic.train()  # switch to train mode (for dropout for example)
+        self.alg.discriminator.train()
        ep_infos = []
        rewbuffer = deque(maxlen=100)
        lenbuffer = deque(maxlen=100)
+        amp_rewbuffer = deque(maxlen=100)
+        task_rewbuffer = deque(maxlen=100)
+
        cur_reward_sum = torch.zeros(
            self.env.num_envs, dtype=torch.float, device=self.device
        )
        cur_episode_length = torch.zeros(
            self.env.num_envs, dtype=torch.float, device=self.device
        )
+        cur_amp_reward_sum = torch.zeros(self.env.num_envs, dtype=torch.float, device=self.device)
+        cur_task_reward_sum = torch.zeros(self.env.num_envs, dtype=torch.float, device=self.device)

        tot_iter = self.current_learning_iteration + num_learning_iterations

@@ -128,30 +155,49 @@ class DHOnPolicyRunner:
            # Rollout
            with torch.inference_mode():
                for i in range(self.num_steps_per_env):
-                    actions = self.alg.act(obs, critic_obs)
-                    obs, privileged_obs, rewards, dones, infos = self.env.step(actions)
+                    actions = self.alg.act(obs, critic_obs, amp_obs)
+                    obs, privileged_obs, rewards, dones, infos, reset_env_ids, terminal_amp_states = self.env.step(actions)
                    critic_obs = privileged_obs if privileged_obs is not None else obs
+                    next_amp_obs = self.env.get_amp_observations()
                    obs, critic_obs, rewards, dones = (
                        obs.to(self.device),
                        critic_obs.to(self.device),
                        rewards.to(self.device),
                        dones.to(self.device),
                    )
-                    self.alg.process_env_step(rewards, dones, infos)
+
+                    next_amp_obs = next_amp_obs.to(self.device)
+                    # Account for terminal states.
+                    next_amp_obs_with_term = torch.clone(next_amp_obs)
+                    next_amp_obs_with_term[reset_env_ids] = terminal_amp_states
+                    rewards,amp_reward,task_reward = self.alg.discriminator.predict_amp_reward(amp_obs, next_amp_obs_with_term, rewards, normalizer=self.alg.amp_normalizer)
+                    amp_obs = torch.clone(next_amp_obs)
+
+                    self.alg.process_env_step(rewards, dones, infos, next_amp_obs_with_term)

                    if self.log_dir is not None:
                        # Book keeping
                        if "episode" in infos:
                            ep_infos.append(infos["episode"])
                        cur_reward_sum += rewards
+                        cur_amp_reward_sum += amp_reward
+                        cur_task_reward_sum += task_reward
                        cur_episode_length += 1
                        new_ids = (dones > 0).nonzero(as_tuple=False)
                        rewbuffer.extend(
                            cur_reward_sum[new_ids][:, 0].cpu().numpy().tolist()
                        )
+                        amp_rewbuffer.extend(
+                            cur_amp_reward_sum[new_ids][:, 0].cpu().numpy().tolist()
+                        )
+                        task_rewbuffer.extend(
+                            cur_task_reward_sum[new_ids][:, 0].cpu().numpy().tolist()
+                        )
                        lenbuffer.extend(
                            cur_episode_length[new_ids][:, 0].cpu().numpy().tolist()
                        )
+                        cur_amp_reward_sum[new_ids] = 0
+                        cur_task_reward_sum[new_ids] = 0
                        cur_reward_sum[new_ids] = 0
                        cur_episode_length[new_ids] = 0

@@ -162,7 +208,7 @@ class DHOnPolicyRunner:
                start = stop
                self.alg.compute_returns(critic_obs)

-            mean_value_loss, mean_surrogate_loss, mean_state_estimator_loss = self.alg.update()
+            mean_value_loss, mean_surrogate_loss, mean_state_estimator_loss ,mean_amp_loss, mean_grad_pen_loss, mean_policy_pred, mean_expert_pred= self.alg.update()
            stop = time.time()
            learn_time = stop - start
            if self.log_dir is not None:
@@ -213,6 +259,8 @@ class DHOnPolicyRunner:
        self.writer.add_scalar(
            "Loss/state_estimator", locs["mean_state_estimator_loss"], locs["it"]
        )
+        self.writer.add_scalar('Loss/AMP', locs['mean_amp_loss'], locs['it'])
+        self.writer.add_scalar('Loss/AMP_grad', locs['mean_grad_pen_loss'], locs['it'])
        self.writer.add_scalar("Loss/learning_rate", self.alg.learning_rate, locs["it"])
        self.writer.add_scalar("Policy/mean_noise_std", mean_std.item(), locs["it"])
        self.writer.add_scalar("Perf/total_fps", fps, locs["it"])
@@ -224,6 +272,8 @@ class DHOnPolicyRunner:
            self.writer.add_scalar(
                "Train/mean_reward", statistics.mean(locs["rewbuffer"]), locs["it"]
            )
+            self.writer.add_scalar('Train/amp_reward', statistics.mean(locs['amp_rewbuffer']), locs['it'])
+            self.writer.add_scalar('Train/task_reward', statistics.mean(locs['task_rewbuffer']), locs['it'])
            self.writer.add_scalar(
                "Train/mean_episode_length",
                statistics.mean(locs["lenbuffer"]),
@@ -256,8 +306,15 @@ class DHOnPolicyRunner:
                f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n"""
                f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n"""
                f"""{'State estimator loss:':>{pad}} {locs['mean_state_estimator_loss']:.4f}\n"""
+                f"""{'AMP loss:':>{pad}} {locs['mean_amp_loss']:.4f}\n"""
+                f"""{'AMP grad pen loss:':>{pad}} {locs['mean_grad_pen_loss']:.4f}\n"""
+                f"""{'AMP mean policy pred:':>{pad}} {locs['mean_policy_pred']:.4f}\n"""
+                f"""{'AMP mean expert pred:':>{pad}} {locs['mean_expert_pred']:.4f}\n"""
                f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n"""
                f"""{'Mean reward:':>{pad}} {statistics.mean(locs['rewbuffer']):.2f}\n"""
+                f"""{'Mean reward:':>{pad}} {statistics.mean(locs['rewbuffer']):.2f}\n"""
+                f"""{'Mean amp reward:':>{pad}} {statistics.mean(locs['amp_rewbuffer']):.2f}\n"""
+                f"""{'Mean task reward:':>{pad}} {statistics.mean(locs['task_rewbuffer']):.2f}\n"""
                f"""{'Mean episode length:':>{pad}} {statistics.mean(locs['lenbuffer']):.2f}\n"""
            )
            #   f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n"""
@@ -294,6 +351,8 @@ class DHOnPolicyRunner:
                "es_optimizer_state_dict": self.alg.state_estimator_optimizer.state_dict(),
                "iter": self.it,
                "infos": infos,
+                'discriminator_state_dict': self.alg.discriminator.state_dict(),
+                'amp_normalizer': self.alg.amp_normalizer,
            },
            path,
        )
@@ -305,6 +364,8 @@ class DHOnPolicyRunner:
            self.alg.optimizer.load_state_dict(loaded_dict["optimizer_state_dict"])
            self.alg.state_estimator_optimizer.load_state_dict(loaded_dict["es_optimizer_state_dict"])
        self.current_learning_iteration = loaded_dict["iter"]
+        self.alg.discriminator.load_state_dict(loaded_dict['discriminator_state_dict'])
+        self.alg.amp_normalizer = loaded_dict['amp_normalizer']
        return loaded_dict["infos"]

    def get_inference_policy(self, device=None):

--- a/humanoid/algo/ppo/dh_ppo.py
+++ b/humanoid/algo/ppo/dh_ppo.py
@@ -36,11 +36,17 @@ import torch.optim as optim

 from .actor_critic_dh import ActorCriticDH
 from .rollout_storage import RolloutStorage
+from .replay_buffer import ReplayBuffer

 class DHPPO:
    actor_critic: ActorCriticDH
    def __init__(self,
                 actor_critic,
+                 discriminator,
+                 amp_data,
+                 amp_normalizer,
+                 min_std=None,
+                 amp_replay_buffer_size=100000,
                 num_learning_epochs=1,
                 num_mini_batches=1,
                 clip_param=0.2,
@@ -62,13 +68,28 @@ class DHPPO:
        self.desired_kl = desired_kl
        self.schedule = schedule
        self.learning_rate = learning_rate
+        self.min_std = min_std
+
+        # Discriminator components
+        self.discriminator = discriminator
+        self.discriminator.to(self.device)
+        self.amp_transition = RolloutStorage.Transition()
+        self.amp_storage = ReplayBuffer(
+            discriminator.input_dim // 2, amp_replay_buffer_size, device)
+        self.amp_data = amp_data
+        self.amp_normalizer = amp_normalizer

        # PPO components
        self.actor_critic: ActorCriticDH = actor_critic
        self.actor_critic.to(self.device)
        self.storage = None # initialized later
-        self.optimizer = optim.Adam(self.actor_critic.parameters(),
-                                    lr=learning_rate)
+        params = [
+            {'params': self.actor_critic.parameters(), 'name': 'actor_critic'},
+            {'params': self.discriminator.trunk.parameters(),
+             'weight_decay': 10e-4, 'name': 'amp_trunk'},
+            {'params': self.discriminator.amp_linear.parameters(),
+             'weight_decay': 10e-2, 'name': 'amp_head'}]
+        self.optimizer = optim.Adam(params, lr=learning_rate)
        self.state_estimator_optimizer = optim.Adam(self.actor_critic.state_estimator.parameters(),
                                        lr=learning_rate)
        self.transition = RolloutStorage.Transition()
@@ -95,7 +116,7 @@ class DHPPO:
    def train_mode(self):
        self.actor_critic.train()

-    def act(self, obs, critic_obs):
+    def act(self, obs, critic_obs, amp_obs):
        # Compute the actions and values
        self.transition.actions = self.actor_critic.act(obs).detach()
        self.transition.values = self.actor_critic.evaluate(critic_obs).detach()
@@ -105,9 +126,11 @@ class DHPPO:
        # need to record obs and critic_obs before env.step()
        self.transition.observations = obs
        self.transition.critic_observations = critic_obs
+        self.amp_transition.observations = amp_obs
+
        return self.transition.actions
    
-    def process_env_step(self, rewards, dones, infos):
+    def process_env_step(self, rewards, dones, infos, amp_obs):
        self.transition.rewards = rewards.clone()
        self.transition.dones = dones
        # Bootstrapping on time outs
@@ -115,8 +138,10 @@ class DHPPO:
            self.transition.rewards += self.gamma * torch.squeeze(self.transition.values * infos['time_outs'].unsqueeze(1).to(self.device), 1)

        # Record the transition
+        self.amp_storage.insert(self.amp_transition.observations, amp_obs)
        self.storage.add_transitions(self.transition)
        self.transition.clear()
+        self.amp_transition.clear()
        self.actor_critic.reset(dones)
    
    def compute_returns(self, last_critic_obs):
@@ -128,9 +153,22 @@ class DHPPO:
        mean_surrogate_loss = 0
        mean_state_estimator_loss = 0

+        mean_amp_loss = 0
+        mean_grad_pen_loss = 0
+        mean_policy_pred = 0
+        mean_expert_pred = 0
+
        generator = self.storage.mini_batch_generator(self.num_mini_batches, self.num_learning_epochs)
-        for obs_batch, critic_obs_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch, \
-            old_mu_batch, old_sigma_batch, hid_states_batch, masks_batch in generator:
+        amp_policy_generator = self.amp_storage.feed_forward_generator(
+            self.num_learning_epochs * self.num_mini_batches,
+            self.storage.num_envs * self.storage.num_transitions_per_env //
+                self.num_mini_batches)
+        amp_expert_generator = self.amp_data.feed_forward_generator(
+            self.num_learning_epochs * self.num_mini_batches,
+            self.storage.num_envs * self.storage.num_transitions_per_env //
+                self.num_mini_batches)
+        for (obs_batch, critic_obs_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch, \
+            old_mu_batch, old_sigma_batch, hid_states_batch, masks_batch), sample_amp_policy, sample_amp_expert in zip(generator, amp_policy_generator, amp_expert_generator):

                self.actor_critic.act(obs_batch, masks=masks_batch, hidden_states=hid_states_batch[0])
                state_estimator_input = obs_batch[:,-self.num_short_obs:]
@@ -174,10 +212,34 @@ class DHPPO:
                else:
                    value_loss = (returns_batch - value_batch).pow(2).mean()
                
+                # Discriminator loss.
+                policy_state, policy_next_state = sample_amp_policy
+                expert_state, expert_next_state = sample_amp_expert
+
+                policy_state_unnorm = torch.clone(policy_state)
+                expert_state_unnorm = torch.clone(expert_state)
+
+                if self.amp_normalizer is not None:
+                    with torch.no_grad():
+                        policy_state = self.amp_normalizer.normalize_torch(policy_state, self.device)
+                        policy_next_state = self.amp_normalizer.normalize_torch(policy_next_state, self.device)
+                        expert_state = self.amp_normalizer.normalize_torch(expert_state, self.device)
+                        expert_next_state = self.amp_normalizer.normalize_torch(expert_next_state, self.device)
+                policy_d = self.discriminator(torch.cat([policy_state, policy_next_state], dim=-1))
+                expert_d = self.discriminator(torch.cat([expert_state, expert_next_state], dim=-1))
+                expert_loss = torch.nn.MSELoss()(
+                    expert_d, torch.ones(expert_d.size(), device=self.device))
+                policy_loss = torch.nn.MSELoss()(
+                    policy_d, -1 * torch.ones(policy_d.size(), device=self.device))
+                amp_loss = 0.5 * (expert_loss + policy_loss)
+                grad_pen_loss = self.discriminator.compute_grad_pen(
+                    expert_state, expert_next_state, lambda_=10)
+
+
                # update all actor_critic.parameters()
                loss = (surrogate_loss + 
                        self.value_loss_coef * value_loss - 
-                        self.entropy_coef * entropy_batch.mean() +
+                        self.entropy_coef * entropy_batch.mean() + amp_loss + grad_pen_loss +
                        torch.nn.MSELoss()(est_lin_vel, ref_lin_vel))
                
                # Gradient step
@@ -186,16 +248,31 @@ class DHPPO:
                nn.utils.clip_grad_norm_(self.actor_critic.parameters(), self.max_grad_norm)
                self.optimizer.step()

+                self.actor_critic.std.data = self.actor_critic.std.data.clamp(min=self.min_std) #yzh comment: may be not necessary
+
+                if self.amp_normalizer is not None:
+                    self.amp_normalizer.update(policy_state_unnorm.cpu().numpy())
+                    self.amp_normalizer.update(expert_state_unnorm.cpu().numpy())
+
                state_estimator_loss = torch.nn.MSELoss()(est_lin_vel, ref_lin_vel)

                mean_value_loss += value_loss.item()
                mean_surrogate_loss += surrogate_loss.item()
                mean_state_estimator_loss += state_estimator_loss.item()
+                mean_amp_loss += amp_loss.item()
+                mean_grad_pen_loss += grad_pen_loss.item()
+                mean_policy_pred += policy_d.mean().item()
+                mean_expert_pred += expert_d.mean().item()
+

        num_updates = self.num_learning_epochs * self.num_mini_batches
        mean_value_loss /= num_updates
        mean_surrogate_loss /= num_updates
        mean_state_estimator_loss /= num_updates
+        mean_amp_loss /= num_updates
+        mean_grad_pen_loss /= num_updates
+        mean_policy_pred /= num_updates
+        mean_expert_pred /= num_updates
        self.storage.clear()

-        return mean_value_loss, mean_surrogate_loss, mean_state_estimator_loss
+        return mean_value_loss, mean_surrogate_loss, mean_state_estimator_loss, mean_amp_loss, mean_grad_pen_loss, mean_policy_pred, mean_expert_pred
--- a/humanoid/algo/ppo/replay_buffer.py
+++ b/humanoid/algo/ppo/replay_buffer.py
+import torch
+import numpy as np
+
+
+class ReplayBuffer:
+    """Fixed-size buffer to store experience tuples."""
+
+    def __init__(self, obs_dim, buffer_size, device):
+        """Initialize a ReplayBuffer object.
+        Arguments:
+            buffer_size (int): maximum size of buffer
+        """
+        self.states = torch.zeros(buffer_size, obs_dim).to(device)
+        self.next_states = torch.zeros(buffer_size, obs_dim).to(device)
+        self.buffer_size = buffer_size
+        self.device = device
+
+        self.step = 0
+        self.num_samples = 0
+    
+    def insert(self, states, next_states):
+        """Add new states to memory."""
+        
+        num_states = states.shape[0]
+        start_idx = self.step
+        end_idx = self.step + num_states
+        if end_idx > self.buffer_size:
+            self.states[self.step:self.buffer_size] = states[:self.buffer_size - self.step]
+            self.next_states[self.step:self.buffer_size] = next_states[:self.buffer_size - self.step]
+            self.states[:end_idx - self.buffer_size] = states[self.buffer_size - self.step:]
+            self.next_states[:end_idx - self.buffer_size] = next_states[self.buffer_size - self.step:]
+        else:
+            self.states[start_idx:end_idx] = states
+            self.next_states[start_idx:end_idx] = next_states
+
+        self.num_samples = min(self.buffer_size, max(end_idx, self.num_samples))
+        self.step = (self.step + num_states) % self.buffer_size
+
+    def feed_forward_generator(self, num_mini_batch, mini_batch_size):
+        for _ in range(num_mini_batch):
+            sample_idxs = np.random.choice(self.num_samples, size=mini_batch_size)
+            yield (self.states[sample_idxs].to(self.device),
+                   self.next_states[sample_idxs].to(self.device))
--- a/humanoid/envs/base/legged_robot.py
+++ b/humanoid/envs/base/legged_robot.py
@@ -46,7 +46,9 @@ from humanoid.utils.math import quat_apply_yaw, wrap_to_pi, torch_rand_sqrt_floa
 from humanoid.utils.helpers import class_to_dict
 from .legged_robot_config import LeggedRobotCfg
 from humanoid.utils.terrain import Terrain
-
+from humanoid.algo.loadamp.motion_loader import AMPLoader
+import humanoid.utils.kinematics.urdf as pk
+import glob
 def copysign_new(a, b):

    a = torch.tensor(a, device=b.device, dtype=torch.float)
@@ -102,11 +104,22 @@ class LeggedRobot(BaseTask):
        self.init_done = False
        self._parse_cfg(self.cfg)
        super().__init__(self.cfg, sim_params, physics_engine, sim_device, headless)
+        self.chain_ee = []
+        TEMP_UDRF = '/home/bxi/Documents/agibot_x1_train_bak/datasets/bot_elf_tempurdf/urdf/bot_elf.urdf'
+        for ee_name in self.cfg.env.ee_names:
+            with open(TEMP_UDRF, "rb") as urdf_file:
+            # with open(self.cfg.asset.file, "rb") as urdf_file:
+                urdf_content = urdf_file.read()
+                chain_ee_instance = pk.build_serial_chain_from_urdf(urdf_content, ee_name).to(device=self.device)
+                self.chain_ee.append(chain_ee_instance)
        if not self.headless:
            self.set_camera(self.cfg.viewer.pos, self.cfg.viewer.lookat)
        self._init_buffers()
        self._prepare_reward_function()
        self.init_done = True
+        # print("motion_files dir: ")
+        # print(self.cfg.env.amp_motion_files)
+        self.amp_loader = AMPLoader(motion_files=self.cfg.env.amp_motion_files, device=self.device, time_between_frames=self.dt)

    def step(self, actions):
        """ Apply actions, simulate, call self.post_physics_step()
@@ -145,19 +158,19 @@ class LeggedRobot(BaseTask):
                self.imu_lag_buffer[:,:,1:] = self.imu_lag_buffer[:,:,:self.cfg.domain_rand.imu_lag_timesteps_range[1]].clone()
                self.imu_lag_buffer[:,:,0] = torch.cat((self.base_ang_vel, self.base_euler_xyz ), 1).clone()
        
-        self.post_physics_step()
+        reset_env_ids, terminal_amp_states = self.post_physics_step()

        # return clipped obs, clipped states (None), rewards, dones and infos
        clip_obs = self.cfg.normalization.clip_observations
        self.obs_buf = torch.clip(self.obs_buf, -clip_obs, clip_obs)
        if self.privileged_obs_buf is not None:
            self.privileged_obs_buf = torch.clip(self.privileged_obs_buf, -clip_obs, clip_obs)
-        return self.obs_buf, self.privileged_obs_buf, self.rew_buf, self.reset_buf, self.extras
+        return self.obs_buf, self.privileged_obs_buf, self.rew_buf, self.reset_buf, self.extras, reset_env_ids, terminal_amp_states

    def reset(self):
        """ Reset all robots"""
        self.reset_idx(torch.arange(self.num_envs, device=self.device))
-        obs, privileged_obs, _, _, _ = self.step(torch.zeros(
+        obs, privileged_obs, _, _, _,_ ,_= self.step(torch.zeros(
            self.num_envs, self.num_actions, device=self.device, requires_grad=False))
        return obs, privileged_obs
    
@@ -188,6 +201,7 @@ class LeggedRobot(BaseTask):
        self.check_termination()
        self.compute_reward()
        env_ids = self.reset_buf.nonzero(as_tuple=False).flatten()
+        terminal_amp_states = self.get_amp_observations()[env_ids]
        self.reset_idx(env_ids)
        self.compute_observations() # in some cases a simulation step might be required to refresh some obs (for example body positions)

@@ -203,6 +217,8 @@ class LeggedRobot(BaseTask):
        if self.viewer and self.enable_viewer_sync and self.debug_viz:
            self._draw_debug_vis()
        
+        return env_ids, terminal_amp_states
+
    def check_termination(self):
        """ Check if environments need to be reset
        """
@@ -238,11 +254,14 @@ class LeggedRobot(BaseTask):
            self.update_command_curriculum(env_ids)
        
        # reset rand dof_pos and dof_vel=0
-        self._reset_dofs(env_ids)
-
        # reset base position
+        if self.cfg.env.reference_state_initialization:
+            frames = self.amp_loader.get_full_frame_batch(len(env_ids))
+            self._reset_dofs_amp(env_ids, frames)
+            self._reset_root_states_amp(env_ids, frames)
+        else:
+            self._reset_dofs(env_ids)
            self._reset_root_states(env_ids)
-
        # sample cmd based on self.command_ranges
        self._resample_commands(env_ids)
    
@@ -757,6 +776,25 @@ class LeggedRobot(BaseTask):
        self.gym.set_dof_state_tensor_indexed(self.sim,
                                              gymtorch.unwrap_tensor(self.dof_state),
                                              gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
+    def _reset_dofs_amp(self, env_ids, frames):
+        """ Resets DOF position and velocities of selected environmments
+        Positions are randomly selected within 0.5:1.5 x default positions.
+        Velocities are set to zero.
+
+        Args:
+            env_ids (List[int]): Environemnt ids
+            frames: AMP frames to initialize motion with
+        """
+        self.dof_pos[env_ids] = AMPLoader.get_joint_pose_batch(frames)
+        self.dof_vel[env_ids] = AMPLoader.get_joint_vel_batch(frames)
+        self.dof_pos[env_ids] = self.amp_loader.reorder_from_mujuco_to_isaacgym_tool(self.dof_pos[env_ids])
+        self.dof_vel[env_ids] = self.amp_loader.reorder_from_mujuco_to_isaacgym_tool(self.dof_vel[env_ids])
+        self.dof_pos[env_ids] = self.dof_pos[env_ids].clip(self.dof_pos_limits[:,0],self.dof_pos_limits[:,1])
+        self.dof_vel[env_ids] = self.dof_vel[env_ids].clip(torch.zeros_like(self.dof_vel[env_ids]),self.dof_vel_limits)
+        env_ids_int32 = env_ids.to(dtype=torch.int32)
+        self.gym.set_dof_state_tensor_indexed(self.sim,
+                                              gymtorch.unwrap_tensor(self.dof_state),
+                                              gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
        
    def _reset_root_states(self, env_ids):
        """ Resets ROOT states position and velocities of selected environmments
@@ -787,7 +825,27 @@ class LeggedRobot(BaseTask):
        self.gym.set_actor_root_state_tensor_indexed(self.sim,
                                                     gymtorch.unwrap_tensor(self.root_states),
                                                     gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
+    def _reset_root_states_amp(self, env_ids, frames):
+        """ Resets ROOT states position and velocities of selected environmments
+            Sets base position based on the curriculum
+            Selects randomized base velocities within -0.5:0.5 [m/s, rad/s]
+        Args:
+            env_ids (List[int]): Environemnt ids
+        """
+        # base position
+        root_pos = AMPLoader.get_root_pos_batch(frames)
+        root_pos[:, :2] = root_pos[:, :2] + self.env_origins[env_ids, :2]
+        self.root_states[env_ids, :3] = root_pos
+        # base velocities
+        root_orn = AMPLoader.get_root_rot_batch(frames)
+        self.root_states[env_ids, 3:7] = root_orn
+        self.root_states[env_ids, 7:10] = quat_rotate(root_orn, AMPLoader.get_linear_vel_batch(frames))
+        self.root_states[env_ids, 10:13] = quat_rotate(root_orn, AMPLoader.get_angular_vel_batch(frames))

+        env_ids_int32 = env_ids.to(dtype=torch.int32)
+        self.gym.set_actor_root_state_tensor_indexed(self.sim,
+                                                     gymtorch.unwrap_tensor(self.root_states),
+                                                     gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
    def _push_robots(self):
        """ Random pushes the robots. Emulates an impulse by setting a randomized base velocity. 
        """

--- a/humanoid/envs/base/legged_robot_config.py
+++ b/humanoid/envs/base/legged_robot_config.py
--- a/humanoid/envs/elf12/elf12_dh_stand_config.py
+++ b/humanoid/envs/elf12/elf12_dh_stand_config.py
--- a/humanoid/envs/elf12/elf12_dh_stand_env.py
+++ b/humanoid/envs/elf12/elf12_dh_stand_env.py
--- a/humanoid/scripts/play.py
+++ b/humanoid/scripts/play.py
--- a/humanoid/utils/kinematics/__init__.py
+++ b/humanoid/utils/kinematics/__init__.py
--- a/humanoid/utils/kinematics/chain.py
+++ b/humanoid/utils/kinematics/chain.py
--- a/humanoid/utils/kinematics/frame.py
+++ b/humanoid/utils/kinematics/frame.py
--- a/humanoid/utils/kinematics/transforms/__init__.py
+++ b/humanoid/utils/kinematics/transforms/__init__.py
--- a/humanoid/utils/kinematics/transforms/math.py
+++ b/humanoid/utils/kinematics/transforms/math.py
--- a/humanoid/utils/kinematics/transforms/rotation_conversions.py
+++ b/humanoid/utils/kinematics/transforms/rotation_conversions.py
--- a/humanoid/utils/kinematics/transforms/so3.py
+++ b/humanoid/utils/kinematics/transforms/so3.py
--- a/humanoid/utils/kinematics/transforms/transform3d.py
+++ b/humanoid/utils/kinematics/transforms/transform3d.py
--- a/humanoid/utils/kinematics/urdf.py
+++ b/humanoid/utils/kinematics/urdf.py
--- a/humanoid/utils/kinematics/urdf_parser_py/__init__.py
+++ b/humanoid/utils/kinematics/urdf_parser_py/__init__.py
--- a/humanoid/utils/kinematics/urdf_parser_py/urdf.py
+++ b/humanoid/utils/kinematics/urdf_parser_py/urdf.py
--- a/humanoid/utils/kinematics/urdf_parser_py/xml_reflection/__init__.py
+++ b/humanoid/utils/kinematics/urdf_parser_py/xml_reflection/__init__.py
--- a/humanoid/utils/kinematics/urdf_parser_py/xml_reflection/basics.py
+++ b/humanoid/utils/kinematics/urdf_parser_py/xml_reflection/basics.py
--- a/humanoid/utils/kinematics/urdf_parser_py/xml_reflection/core.py
+++ b/humanoid/utils/kinematics/urdf_parser_py/xml_reflection/core.py
--- a/resources/robots/elf2/urdf/elf2_dof12 copy.urdf
+++ b/resources/robots/elf2/urdf/elf2_dof12 copy.urdf
--- a/resources/robots/elf2/urdf/elf2_dof12.urdf
+++ b/resources/robots/elf2/urdf/elf2_dof12.urdf
--- a/resources/robots/elf2/urdf/elf2_dof25.urdf
+++ b/resources/robots/elf2/urdf/elf2_dof25.urdf