`ding.policy.command_mode_policy_instance`¶

`ding.policy.command_mode_policy_instance` ¶

`EpsCommandModePolicy` ¶

Bases: CommandModePolicy
`DDPGCommandModePolicy` ¶

Bases: DDPGPolicy, CommandModePolicy
`BCCommandModePolicy` ¶

Bases: BehaviourCloningPolicy, DummyCommandModePolicy
Full Source Code

../ding/policy/command_mode_policy_instance.py
from ding.utils import POLICY_REGISTRYfrom ding.rl_utils import get_epsilon_greedy_fnfrom .base_policy import CommandModePolicyfrom .dqn import DQNPolicy, DQNSTDIMPolicyfrom .mdqn import MDQNPolicyfrom .c51 import C51Policyfrom .qrdqn import QRDQNPolicyfrom .iqn import IQNPolicyfrom .fqf import FQFPolicyfrom .rainbow import RainbowDQNPolicyfrom .r2d2 import R2D2Policyfrom .r2d2_gtrxl import R2D2GTrXLPolicyfrom .r2d2_collect_traj import R2D2CollectTrajPolicyfrom .sqn import SQNPolicyfrom .ppo import PPOPolicy, PPOOffPolicy, PPOPGPolicy, PPOSTDIMPolicyfrom .vmpo import VMPOPolicyfrom .offppo_collect_traj import OffPPOCollectTrajPolicyfrom .ppg import PPGPolicy, PPGOffPolicyfrom .pg import PGPolicyfrom .a2c import A2CPolicyfrom .impala import IMPALAPolicyfrom .ngu import NGUPolicyfrom .ddpg import DDPGPolicyfrom .td3 import TD3Policyfrom .td3_vae import TD3VAEPolicyfrom .td3_bc import TD3BCPolicyfrom .sac import SACPolicy, DiscreteSACPolicy, SQILSACPolicyfrom .mbpolicy.mbsac import MBSACPolicy, STEVESACPolicyfrom .mbpolicy.dreamer import DREAMERPolicyfrom .qmix import QMIXPolicyfrom .wqmix import WQMIXPolicyfrom .collaq import CollaQPolicyfrom .coma import COMAPolicyfrom .atoc import ATOCPolicyfrom .acer import ACERPolicyfrom .qtran import QTRANPolicyfrom .sql import SQLPolicyfrom .bc import BehaviourCloningPolicyfrom .ibc import IBCPolicyfrom .dqfd import DQFDPolicyfrom .r2d3 import R2D3Policyfrom .d4pg import D4PGPolicyfrom .cql import CQLPolicy, DiscreteCQLPolicyfrom .iql import IQLPolicyfrom .dt import DTPolicyfrom .pdqn import PDQNPolicyfrom .madqn import MADQNPolicyfrom .bdq import BDQPolicyfrom .bcq import BCQPolicyfrom .edac import EDACPolicyfrom .prompt_pg import PromptPGPolicyfrom .plan_diffuser import PDPolicyfrom .happo import HAPPOPolicyfrom .prompt_awr import PromptAWRPolicyclass EpsCommandModePolicy(CommandModePolicy):    def _init_command(self) -> None:        r"""        Overview:            Command mode init method. Called by ``self.__init__``.            Set the eps_greedy rule according to the config for command        """        eps_cfg = self._cfg.other.eps        self.epsilon_greedy = get_epsilon_greedy_fn(eps_cfg.start, eps_cfg.end, eps_cfg.decay, eps_cfg.type)    def _get_setting_collect(self, command_info: dict) -> dict:        r"""        Overview:            Collect mode setting information including eps        Arguments:            - command_info (:obj:`dict`): Dict type, including at least ['learner_train_iter', 'collector_envstep']        Returns:           - collect_setting (:obj:`dict`): Including eps in collect mode.        """        # Decay according to `learner_train_iter`        # step = command_info['learner_train_iter']        # Decay according to `envstep`        step = command_info['envstep']        return {'eps': self.epsilon_greedy(step)}    def _get_setting_learn(self, command_info: dict) -> dict:        return {}    def _get_setting_eval(self, command_info: dict) -> dict:        return {}class DummyCommandModePolicy(CommandModePolicy):    def _init_command(self) -> None:        pass    def _get_setting_collect(self, command_info: dict) -> dict:        return {}    def _get_setting_learn(self, command_info: dict) -> dict:        return {}    def _get_setting_eval(self, command_info: dict) -> dict:        return {}@POLICY_REGISTRY.register('bdq_command')class BDQCommandModePolicy(BDQPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('mdqn_command')class MDQNCommandModePolicy(MDQNPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('dqn_command')class DQNCommandModePolicy(DQNPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('dqn_stdim_command')class DQNSTDIMCommandModePolicy(DQNSTDIMPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('dqfd_command')class DQFDCommandModePolicy(DQFDPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('c51_command')class C51CommandModePolicy(C51Policy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('qrdqn_command')class QRDQNCommandModePolicy(QRDQNPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('iqn_command')class IQNCommandModePolicy(IQNPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('fqf_command')class FQFCommandModePolicy(FQFPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('rainbow_command')class RainbowDQNCommandModePolicy(RainbowDQNPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('r2d2_command')class R2D2CommandModePolicy(R2D2Policy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('r2d2_gtrxl_command')class R2D2GTrXLCommandModePolicy(R2D2GTrXLPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('r2d2_collect_traj_command')class R2D2CollectTrajCommandModePolicy(R2D2CollectTrajPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('r2d3_command')class R2D3CommandModePolicy(R2D3Policy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('sqn_command')class SQNCommandModePolicy(SQNPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('sql_command')class SQLCommandModePolicy(SQLPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('ppo_command')class PPOCommandModePolicy(PPOPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('vmpo_command')class VMPOCommandModePolicy(VMPOPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('happo_command')class HAPPOCommandModePolicy(HAPPOPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('ppo_stdim_command')class PPOSTDIMCommandModePolicy(PPOSTDIMPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('ppo_pg_command')class PPOPGCommandModePolicy(PPOPGPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('ppo_offpolicy_command')class PPOOffCommandModePolicy(PPOOffPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('offppo_collect_traj_command')class PPOOffCollectTrajCommandModePolicy(OffPPOCollectTrajPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('pg_command')class PGCommandModePolicy(PGPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('a2c_command')class A2CCommandModePolicy(A2CPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('impala_command')class IMPALACommandModePolicy(IMPALAPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('ppg_offpolicy_command')class PPGOffCommandModePolicy(PPGOffPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('ppg_command')class PPGCommandModePolicy(PPGPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('madqn_command')class MADQNCommandModePolicy(MADQNPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('ddpg_command')class DDPGCommandModePolicy(DDPGPolicy, CommandModePolicy):    def _init_command(self) -> None:        r"""        Overview:            Command mode init method. Called by ``self.__init__``.            If hybrid action space, set the eps_greedy rule according to the config for command,            otherwise, just a empty method        """        if self._cfg.action_space == 'hybrid':            eps_cfg = self._cfg.other.eps            self.epsilon_greedy = get_epsilon_greedy_fn(eps_cfg.start, eps_cfg.end, eps_cfg.decay, eps_cfg.type)    def _get_setting_collect(self, command_info: dict) -> dict:        r"""        Overview:            Collect mode setting information including eps when hybrid action space        Arguments:            - command_info (:obj:`dict`): Dict type, including at least ['learner_step', 'envstep']        Returns:           - collect_setting (:obj:`dict`): Including eps in collect mode.        """        if self._cfg.action_space == 'hybrid':            # Decay according to `learner_step`            # step = command_info['learner_step']            # Decay according to `envstep`            step = command_info['envstep']            return {'eps': self.epsilon_greedy(step)}        else:            return {}    def _get_setting_learn(self, command_info: dict) -> dict:        return {}    def _get_setting_eval(self, command_info: dict) -> dict:        return {}@POLICY_REGISTRY.register('td3_command')class TD3CommandModePolicy(TD3Policy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('td3_vae_command')class TD3VAECommandModePolicy(TD3VAEPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('td3_bc_command')class TD3BCCommandModePolicy(TD3BCPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('sac_command')class SACCommandModePolicy(SACPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('mbsac_command')class MBSACCommandModePolicy(MBSACPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('stevesac_command')class STEVESACCommandModePolicy(STEVESACPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('dreamer_command')class DREAMERCommandModePolicy(DREAMERPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('cql_command')class CQLCommandModePolicy(CQLPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('iql_command')class IQLCommandModePolicy(IQLPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('discrete_cql_command')class DiscreteCQLCommandModePolicy(DiscreteCQLPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('dt_command')class DTCommandModePolicy(DTPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('qmix_command')class QMIXCommandModePolicy(QMIXPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('wqmix_command')class WQMIXCommandModePolicy(WQMIXPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('collaq_command')class CollaQCommandModePolicy(CollaQPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('coma_command')class COMACommandModePolicy(COMAPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('atoc_command')class ATOCCommandModePolicy(ATOCPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('acer_command')class ACERCommandModePolisy(ACERPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('qtran_command')class QTRANCommandModePolicy(QTRANPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('ngu_command')class NGUCommandModePolicy(NGUPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('d4pg_command')class D4PGCommandModePolicy(D4PGPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('pdqn_command')class PDQNCommandModePolicy(PDQNPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('discrete_sac_command')class DiscreteSACCommandModePolicy(DiscreteSACPolicy, EpsCommandModePolicy):    pass@POLICY_REGISTRY.register('sqil_sac_command')class SQILSACCommandModePolicy(SQILSACPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('ibc_command')class IBCCommandModePolicy(IBCPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('bcq_command')class BCQCommandModelPolicy(BCQPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('edac_command')class EDACCommandModelPolicy(EDACPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('pd_command')class PDCommandModelPolicy(PDPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('bc_command')class BCCommandModePolicy(BehaviourCloningPolicy, DummyCommandModePolicy):    def _init_command(self) -> None:        r"""        Overview:            Command mode init method. Called by ``self.__init__``.            Set the eps_greedy rule according to the config for command        """        if self._cfg.continuous:            noise_cfg = self._cfg.collect.noise_sigma            self.epsilon_greedy = get_epsilon_greedy_fn(noise_cfg.start, noise_cfg.end, noise_cfg.decay, noise_cfg.type)        else:            eps_cfg = self._cfg.other.eps            self.epsilon_greedy = get_epsilon_greedy_fn(eps_cfg.start, eps_cfg.end, eps_cfg.decay, eps_cfg.type)    def _get_setting_collect(self, command_info: dict) -> dict:        r"""        Overview:            Collect mode setting information including eps        Arguments:            - command_info (:obj:`dict`): Dict type, including at least ['learner_train_iter', 'collector_envstep']        Returns:           - collect_setting (:obj:`dict`): Including eps in collect mode.        """        if self._cfg.continuous:            # Decay according to `learner_step`            step = command_info['learner_step']            return {'sigma': self.epsilon_greedy(step)}        else:            # Decay according to `envstep`            step = command_info['envstep']            return {'eps': self.epsilon_greedy(step)}    def _get_setting_learn(self, command_info: dict) -> dict:        return {}    def _get_setting_eval(self, command_info: dict) -> dict:        return {}@POLICY_REGISTRY.register('prompt_pg_command')class PromptPGCommandModePolicy(PromptPGPolicy, DummyCommandModePolicy):    pass@POLICY_REGISTRY.register('prompt_awr_command')class PromptAWRCommandModePolicy(PromptAWRPolicy, DummyCommandModePolicy):    pass
ding.policy.command_mode_policy_instance¶

ding.policy.command_mode_policy_instance ¶

EpsCommandModePolicy ¶

DDPGCommandModePolicy ¶

BCCommandModePolicy ¶

Full Source Code

`ding.policy.command_mode_policy_instance`¶

`ding.policy.command_mode_policy_instance` ¶

`EpsCommandModePolicy` ¶

`DDPGCommandModePolicy` ¶

`BCCommandModePolicy` ¶