ding.config.example.PPOOffPolicy.gym_pongnoframeskip_v4¶
ding.config.example.PPOOffPolicy.gym_pongnoframeskip_v4
¶
Full Source Code
../ding/config/example/PPOOffPolicy/gym_pongnoframeskip_v4.py
1from easydict import EasyDict 2import ding.envs.gym_env 3 4cfg = dict( 5 exp_name='PongNoFrameskip-v4-PPOOffPolicy', 6 env=dict( 7 collector_env_num=8, 8 evaluator_env_num=8, 9 n_evaluator_episode=8, 10 stop_value=30, 11 env_id='PongNoFrameskip-v4', 12 frame_stack=4, 13 env_wrapper='atari_default', 14 ), 15 policy=dict( 16 cuda=True, 17 recompute_adv=True, 18 action_space='discrete', 19 model=dict( 20 obs_shape=[4, 84, 84], 21 action_shape=6, 22 action_space='discrete', 23 encoder_hidden_size_list=[64, 64, 128], 24 actor_head_hidden_size=128, 25 critic_head_hidden_size=128, 26 ), 27 learn=dict( 28 update_per_collect=10, 29 batch_size=320, 30 learning_rate=3e-4, 31 value_weight=0.5, 32 entropy_weight=0.001, 33 clip_ratio=0.2, 34 adv_norm=True, 35 # value_norm=True, 36 ignore_done=False, 37 grad_clip_type='clip_norm', 38 grad_clip_value=0.5, 39 ), 40 collect=dict( 41 n_sample=3200, 42 unroll_len=1, 43 discount_factor=0.99, 44 gae_lambda=0.95, 45 ), 46 ), 47 wandb_logger=dict( 48 gradient_logger=True, video_logger=True, plot_logger=True, action_logger=True, return_logger=False 49 ), 50) 51 52cfg = EasyDict(cfg) 53 54env = ding.envs.gym_env.env