ding.rl_utils¶
ding.rl_utils
¶
Full Source Code
../ding/rl_utils/__init__.py
1from .exploration import get_epsilon_greedy_fn, create_noise_generator 2from .ppo import ppo_data, ppo_loss, ppo_info, ppo_policy_data, ppo_policy_error, ppo_value_data, ppo_value_error, \ 3 ppo_error, ppo_error_continuous, ppo_policy_error_continuous, ppo_data_continuous, ppo_policy_data_continuous 4from .happo import happo_data, happo_policy_data, happo_value_data, happo_loss, happo_policy_loss, happo_info, \ 5 happo_error, happo_policy_error, happo_value_error, happo_error_continuous, happo_policy_error_continuous 6from .ppg import ppg_data, ppg_joint_loss, ppg_joint_error 7from .gae import gae_data, gae 8from .a2c import a2c_data, a2c_error, a2c_error_continuous 9from .coma import coma_data, coma_error 10from .td import q_nstep_td_data, q_nstep_td_error, q_1step_td_data, \ 11 q_1step_td_error, m_q_1step_td_data, m_q_1step_td_error, td_lambda_data, td_lambda_error, \ 12 q_nstep_td_error_with_rescale, v_1step_td_data, v_1step_td_error, v_nstep_td_data, v_nstep_td_error, \ 13 generalized_lambda_returns, dist_1step_td_data, dist_1step_td_error, dist_nstep_td_error, dist_nstep_td_data, \ 14 nstep_return_data, nstep_return, iqn_nstep_td_data, iqn_nstep_td_error, qrdqn_nstep_td_data, qrdqn_nstep_td_error, \ 15 fqf_nstep_td_data, fqf_nstep_td_error, fqf_calculate_fraction_loss, evaluate_quantile_at_action, \ 16 q_nstep_sql_td_error, dqfd_nstep_td_error, dqfd_nstep_td_data, q_v_1step_td_error, q_v_1step_td_data, \ 17 dqfd_nstep_td_error_with_rescale, discount_cumsum, bdq_nstep_td_error 18from .vtrace import vtrace_loss, compute_importance_weights 19from .upgo import upgo_loss 20from .adder import get_gae, get_gae_with_default_last_value, get_nstep_return_data, get_train_sample 21from .value_rescale import value_transform, value_inv_transform, symlog, inv_symlog 22from .vtrace import vtrace_data, vtrace_error_discrete_action, vtrace_error_continuous_action 23from .beta_function import beta_function_map 24from .retrace import compute_q_retraces 25from .acer import acer_policy_error, acer_value_error, acer_trust_region_update 26from .sampler import ArgmaxSampler, MultinomialSampler, MuSampler, ReparameterizationSampler, HybridStochasticSampler, \ 27 HybridDeterminsticSampler