使用自定义模型训练人形 AI 机器人

philoxenic

5.00/5 (2投票s)

2020 年 10 月 2 日

CPOL

3分钟阅读

7548

在本系列文章中，我们将深入研究自定义：编辑该人物的基于 XML 的模型，然后训练结果。

创建调整后的模型：更长的手臂

PyBullet 从 XML 文件加载我们一直在使用的环境。这使我们有机会调整 XML。

通过查看定义人形机器人的 XML 文件，我们可以了解人形机器人是如何由更简单的组件构建的。

具体来说，让我们调整人形模型，使其具有更长的手臂！

以下代码不做任何训练。它只是进行我们感兴趣的更改并显示结果。如果您只是在 SSH shell 中操作，则可以跳过此部分，否则您应该看到人形机器人呈现出更长的手臂。

import xml.etree.ElementTree as ET
from tempfile import NamedTemporaryFile
from time import sleep
from pathlib import Path

import pybullet_data
from pybullet_envs.gym_locomotion_envs import HumanoidBulletEnv
from pybullet_envs.robot_locomotors import Humanoid, WalkerBase


assets_path = Path(pybullet_data.getDataPath()) / "mjcf"
assert assets_path.exists()

humanoid_model_path = assets_path / "humanoid_symmetric.xml"
assert humanoid_model_path.exists()

tree = ET.parse(humanoid_model_path)


def find_elem(node_type, name):
    nodes = tree.iter(node_type)
    for node in nodes:
        if node.get("name") == name:
            return node
    assert False


# make the lower arms longer than usual
for side in ("left", "right"):
    lower_arm = find_elem("geom", f"{side}_larm")

    # was "0.01 0.01 0.01 .17 .17 .17"
    lower_arm.set("fromto", "0.01 0.01 0.01 0.34 0.34 0.34")

    hand = find_elem("geom", f"{side}_hand")
    hand.set("pos", ".35 .35 .35")  # was ".18 .18 .18"


with NamedTemporaryFile() as temp_file:
    tree.write(temp_file)

    class CustomHumanoid(Humanoid):
        def __init__(self):
            # really we should call super().__init__(), but it doesn't
            # let us override these things
            WalkerBase.__init__(self,
                                temp_file.name,
                                'torso',
                                action_dim=17,
                                obs_dim=44,
                                power=0.41)


    class CustomHumanoidBulletEnv(HumanoidBulletEnv):
        def __init__(self, render=False):
            super().__init__(robot=CustomHumanoid(), render=render)

    env = CustomHumanoidBulletEnv()
    env.render(mode="human")
    env.reset()
    while True:
        # keep the process alive so that the environment is still shown
        # Ctrl+C to exit
        sleep(1)

结果应该看起来像这样

使用长小臂进行训练

代码

这是我用来训练具有长手臂的代理行走的的代码。请注意，我使手臂比上面的代码略长，试图鼓励代理使用它们来行走。

import xml.etree.ElementTree as ET
from tempfile import NamedTemporaryFile
from pathlib import Path

import pybullet
import pybullet_data
from pybullet_envs.gym_locomotion_envs import HumanoidBulletEnv
from pybullet_envs.robot_locomotors import Humanoid, WalkerBase
from ray.rllib.agents.sac import SACTrainer
from ray.tune import register_env, tune

assets_path = Path(pybullet_data.getDataPath()) / "mjcf"
assert assets_path.exists()

humanoid_model_path = assets_path / "humanoid_symmetric.xml"
assert humanoid_model_path.exists()

tree = ET.parse(humanoid_model_path)


def find_elem(node_type, name):
    nodes = tree.iter(node_type)
    for node in nodes:
        if node.get("name") == name:
            return node
    assert False


# make the lower arms longer than usual
for side in ("left", "right"):
    lower_arm = find_elem("geom", f"{side}_larm")
    # was "0.01 0.01 0.01 .17 .17 .17"
    lower_arm.set("fromto", "0.01 0.01 0.01 0.42 0.42 0.42")

    hand = find_elem("geom", f"{side}_hand")
    hand.set("pos", ".43 .43 .43")  # was ".18 .18 .18"


with NamedTemporaryFile() as temp_file:
    tree.write(temp_file)

    # grab the file name string; passing the temp_file object itself into
    # functions would cause serialisation problems
    temp_file_name = temp_file.name

    class CustomHumanoid(Humanoid):
        # unlike Humanoid, we treat the hands as extra feet! We use the lower arms
        # because these need to be body elements not geoms
        foot_list = ["right_foot", "left_foot", "left_lower_arm", "right_lower_arm"]

        def __init__(self):
            # really we should call super().__init__(), but it doesn't let us
            # override these things
            WalkerBase.__init__(self,
                                temp_file_name,
                                'torso',
                                action_dim=17,
                                obs_dim=44 + 2,  # +2 for hand contact
                                power=0.41)

        # the MJFCBasedRobot's reset is very specific about where it wants to load
        # the XML from so we do a nasty override here, simplifying the base class
        # logic and using temp_file_name
        def reset(self, bullet_client):
            self._p = bullet_client
            if self.doneLoading == 0:
                self.ordered_joints = []
                self.doneLoading = 1
                self.objects = self._p.loadMJCF(
                    temp_file_name,
                    flags=pybullet.URDF_USE_SELF_COLLISION |
                          pybullet.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS |
                          pybullet.URDF_GOOGLEY_UNDEFINED_COLORS)
                self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(
                    self._p, self.objects)
            self.robot_specific_reset(self._p)

            s = self.calc_state()
            return s

    class CustomHumanoidBulletEnv(HumanoidBulletEnv):
        def __init__(self, render=False):
            super().__init__(robot=CustomHumanoid(), render=render)

    ENV = 'HumanoidBulletEnvLongArms-v0'

    def make_env(*args, **kwargs):
        return CustomHumanoidBulletEnv()

    register_env(ENV, make_env)

    TARGET_REWARD = 6000
    TRAINER = SACTrainer

    tune.run(
        TRAINER,
        stop={"episode_reward_mean": TARGET_REWARD},
        config={
            "env": ENV,
            "num_workers": 7,
            "num_gpus": 1,
            "monitor": True,
            "evaluation_num_episodes": 50,
        }
    )

图

视频

这是进展的最佳视频。如您所见，代理已经学会了行走。它并没有试图用手臂保持平衡，事实证明它们是一种需要平衡的烦恼。

使用猩猩臂进行训练

变更

因为实现起来更简单，所以之前的代码只改变了小臂的长度。这似乎不足以鼓励代理使用它的手臂来帮助它行走。

相反，我尝试将大臂和小臂都更改为与大猩猩大致相同的比例，希望我们能获得更多类似猿类的运动。大猩猩也很重，我没有改变重量分布，所以你可能想尝试一下。

我还发现我必须减少从代理的奖励中扣除的电力成本（参见下面的“取消首次尝试”）。此外，我添加了我们之前用于人形机器人的 SAC 学习参数，而不是依赖默认值。

代码

这是训练具有不同手臂比例的代理的代码。

import xml.etree.ElementTree as ET
from tempfile import NamedTemporaryFile
from pathlib import Path

import pybullet
import pybullet_data
from pybullet_envs.gym_locomotion_envs import HumanoidBulletEnv
from pybullet_envs.robot_locomotors import Humanoid, WalkerBase
from ray.rllib.agents.sac import SACTrainer
from ray.tune import register_env, tune

assets_path = Path(pybullet_data.getDataPath()) / "mjcf"
assert assets_path.exists()

humanoid_model_path = assets_path / "humanoid_symmetric.xml"
assert humanoid_model_path.exists()

tree = ET.parse(humanoid_model_path)


def find_elem(node_type, name):
    nodes = tree.iter(node_type)
    for node in nodes:
        if node.get("name") == name:
            return node
    assert False


# make the arms longer than usual
for side in ("left", "right"):
    upper_arm = find_elem("geom", f"{side}_uarm1")
    upper_arm.set("fromto", "0 0 0 .36 .36 .36")  # was "0 0 0 .16 .16 .16"

    lower_arm_body = find_elem("body", f"{side}_lower_arm")
    lower_arm_body.set("pos", ".37 .37 .37")  # was ".18 -.18 -.18"

    lower_arm = find_elem("geom", f"{side}_larm")
    # was "0.01 0.01 0.01 .17 .17 .17"
    lower_arm.set("fromto", ".01 .01 .01 .3 .3 -.3")

    hand = find_elem("geom", f"{side}_hand")
    hand.set("pos", ".31 .31 -.31")  # was ".18 .18 .18"

    # wrap the hand geoms in body objects, like the feet already have
    hand_body = ET.Element("body", {"name": f"{side}_hand"})
    lower_arm_body.remove(hand)
    hand_body.append(hand)
    lower_arm_body.append(hand_body)


with NamedTemporaryFile() as temp_file:
    tree.write(temp_file)

    # grab the file name string; passing the temp_file object itself into
    # functions would cause serialisation problems
    temp_file_name = temp_file.name

    class CustomHumanoid(Humanoid):
        # unlike Humanoid, we treat the hands as extra feet!
        foot_list = ["right_foot", "left_foot", "left_hand", "right_hand"]

        def __init__(self):
            # really we should call super().__init__(), but it doesn't let us
            # override these things
            WalkerBase.__init__(self,
                                temp_file_name,
                                'torso',
                                action_dim=17,
                                obs_dim=44 + 2,  # +2 for hand contact
                                power=0.41)

        # the MJFCBasedRobot's reset is very specific about where it wants to
        # load the XML from so we do a nasty override here, simplifying the
        # base class logic and using temp_file_name
        def reset(self, bullet_client):
            self._p = bullet_client
            if self.doneLoading == 0:
                self.ordered_joints = []
                self.doneLoading = 1
                self.objects = self._p.loadMJCF(
                    temp_file_name,
                    flags=pybullet.URDF_USE_SELF_COLLISION |
                          pybullet.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS |
                          pybullet.URDF_GOOGLEY_UNDEFINED_COLORS)
                self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(
                    self._p, self.objects)
            self.robot_specific_reset(self._p)

            s = self.calc_state()
            return s

    class CustomHumanoidBulletEnv(HumanoidBulletEnv):
        def __init__(self, render=False):
            super().__init__(robot=CustomHumanoid(), render=render)
            # reduce the electricity cost to encourage walking over standing still
            self.electricity_cost /= 4

    ENV = 'HumanoidBulletEnvLongArms-v1'

    def make_env(*args, **kwargs):
        return CustomHumanoidBulletEnv()

    register_env(ENV, make_env)

    TARGET_REWARD = 100_000
    TRAINER = SACTrainer

    tune.run(
        TRAINER,
        stop={"episode_reward_mean": TARGET_REWARD},
        config={
            "env": ENV,
            "num_workers": 7,
            "num_gpus": 1,
            "monitor": True,
            "evaluation_num_episodes": 50,
            "optimization": {
                "actor_learning_rate": 1e-3,
                "critic_learning_rate": 1e-3,
                "entropy_learning_rate": 3e-4,
            },
            "train_batch_size": 128,
            "target_network_update_freq": 1,
            "learning_starts": 1000,
            "buffer_size": 1_000_000,
            "observation_filter": "MeanStdFilter",
        },
    )

图

我训练了 45 小时后终止了该过程。学习进度的图表如下所示。

由于奖励结构已更改（电力成本较小），因此该图与之前的图不可比。

视频

取消首次尝试

仅出于兴趣，这是我最初尝试此配置但仅将电力成本除以 2 而不是 4 时获得的结果。代理只是学会了通过四肢平衡来累积奖励，而不是试图向前移动。

这是经过 18 小时的训练。

降低电力成本

以下是生成的三段最大的进度视频，它们被包含在内是因为它们各自显示出明显不同的学习行为，而这些确实是仅有的三个可以被认为是成功学习的示例的进度快照。

系列结论

希望您喜欢阅读本系列关于连续控制环境中的强化学习的文章。

您已经了解了两种能够在连续动作空间中学习的算法的基础知识：近端策略优化 (PPO) 和软演员-评论家 (SAC)。您已经看到了 PyBullet 提供的一些环境：CartPole（离散和连续）、Hopper、Ant 和 Humanoid。最后，您已经看到了一些可能性，可以通过调整环境来推动边界和改变代理正在学习的性质，以便代理学会向后跑，并通过改变物理模型，使代理必须学会以不同的体型移动。

我玩得很开心，希望你也一样。感谢您与我一起踏上这段穿越一些奇怪而精彩的计算机生成环境的旅程。