Skip to content

ding.config.example.PPOOffPolicy.gym_lunarlander_v2

ding.config.example.PPOOffPolicy.gym_lunarlander_v2

Full Source Code

../ding/config/example/PPOOffPolicy/gym_lunarlander_v2.py

1from easydict import EasyDict 2import ding.envs.gym_env 3 4cfg = dict( 5 exp_name='LunarLander-v2-PPOOffPolicy', 6 env=dict( 7 collector_env_num=8, 8 evaluator_env_num=8, 9 env_id='LunarLander-v2', 10 n_evaluator_episode=8, 11 stop_value=260, 12 ), 13 policy=dict( 14 cuda=True, 15 model=dict( 16 obs_shape=8, 17 action_shape=4, 18 ), 19 learn=dict( 20 update_per_collect=4, 21 batch_size=64, 22 learning_rate=0.001, 23 value_weight=0.5, 24 entropy_weight=0.01, 25 clip_ratio=0.2, 26 nstep=1, 27 nstep_return=False, 28 adv_norm=True, 29 ), 30 collect=dict( 31 n_sample=128, 32 unroll_len=1, 33 discount_factor=0.99, 34 gae_lambda=0.95, 35 ), 36 ), 37 wandb_logger=dict( 38 gradient_logger=True, video_logger=True, plot_logger=True, action_logger=True, return_logger=False 39 ), 40) 41 42cfg = EasyDict(cfg) 43 44env = ding.envs.gym_env.env