Skip to content

ding.bonus.config

ding.bonus.config

Full Source Code

../ding/bonus/config.py

1from easydict import EasyDict 2import os 3import gym 4from ding.envs import BaseEnv, DingEnvWrapper 5from ding.envs.env_wrappers import MaxAndSkipWrapper, WarpFrameWrapper, ScaledFloatFrameWrapper, FrameStackWrapper, \ 6 EvalEpisodeReturnWrapper, TransposeWrapper, TimeLimitWrapper, FlatObsWrapper, GymToGymnasiumWrapper 7from ding.policy import PPOFPolicy 8 9 10def get_instance_config(env_id: str, algorithm: str) -> EasyDict: 11 if algorithm == 'PPOF': 12 cfg = PPOFPolicy.default_config() 13 if env_id == 'LunarLander-v2': 14 cfg.n_sample = 512 15 cfg.value_norm = 'popart' 16 cfg.entropy_weight = 1e-3 17 elif env_id == 'LunarLanderContinuous-v2': 18 cfg.action_space = 'continuous' 19 cfg.n_sample = 400 20 elif env_id == 'BipedalWalker-v3': 21 cfg.learning_rate = 1e-3 22 cfg.action_space = 'continuous' 23 cfg.n_sample = 1024 24 elif env_id == 'Pendulum-v1': 25 cfg.action_space = 'continuous' 26 cfg.n_sample = 400 27 elif env_id == 'acrobot': 28 cfg.learning_rate = 1e-4 29 cfg.n_sample = 400 30 elif env_id == 'rocket_landing': 31 cfg.n_sample = 2048 32 cfg.adv_norm = False 33 cfg.model = dict( 34 encoder_hidden_size_list=[64, 64, 128], 35 actor_head_hidden_size=128, 36 critic_head_hidden_size=128, 37 ) 38 elif env_id == 'drone_fly': 39 cfg.action_space = 'continuous' 40 cfg.adv_norm = False 41 cfg.epoch_per_collect = 5 42 cfg.learning_rate = 5e-5 43 cfg.n_sample = 640 44 elif env_id == 'hybrid_moving': 45 cfg.action_space = 'hybrid' 46 cfg.n_sample = 3200 47 cfg.entropy_weight = 0.03 48 cfg.batch_size = 320 49 cfg.adv_norm = False 50 cfg.model = dict( 51 encoder_hidden_size_list=[256, 128, 64, 64], 52 sigma_type='fixed', 53 fixed_sigma_value=0.3, 54 bound_type='tanh', 55 ) 56 elif env_id == 'evogym_carrier': 57 cfg.action_space = 'continuous' 58 cfg.n_sample = 2048 59 cfg.batch_size = 256 60 cfg.epoch_per_collect = 10 61 cfg.learning_rate = 3e-3 62 elif env_id == 'mario': 63 cfg.n_sample = 256 64 cfg.batch_size = 64 65 cfg.epoch_per_collect = 2 66 cfg.learning_rate = 1e-3 67 cfg.model = dict( 68 encoder_hidden_size_list=[64, 64, 128], 69 critic_head_hidden_size=128, 70 actor_head_hidden_size=128, 71 ) 72 elif env_id == 'di_sheep': 73 cfg.n_sample = 3200 74 cfg.batch_size = 320 75 cfg.epoch_per_collect = 10 76 cfg.learning_rate = 3e-4 77 cfg.adv_norm = False 78 cfg.entropy_weight = 0.001 79 elif env_id == 'procgen_bigfish': 80 cfg.n_sample = 16384 81 cfg.batch_size = 16384 82 cfg.epoch_per_collect = 10 83 cfg.learning_rate = 5e-4 84 cfg.model = dict( 85 encoder_hidden_size_list=[64, 128, 256], 86 critic_head_hidden_size=256, 87 actor_head_hidden_size=256, 88 ) 89 elif env_id in ['KangarooNoFrameskip-v4', 'BowlingNoFrameskip-v4']: 90 cfg.n_sample = 1024 91 cfg.batch_size = 128 92 cfg.epoch_per_collect = 10 93 cfg.learning_rate = 0.0001 94 cfg.model = dict( 95 encoder_hidden_size_list=[32, 64, 64, 128], 96 actor_head_hidden_size=128, 97 critic_head_hidden_size=128, 98 critic_head_layer_num=2, 99 ) 100 elif env_id == 'PongNoFrameskip-v4': 101 cfg.n_sample = 3200 102 cfg.batch_size = 320 103 cfg.epoch_per_collect = 10 104 cfg.learning_rate = 3e-4 105 cfg.model = dict( 106 encoder_hidden_size_list=[64, 64, 128], 107 actor_head_hidden_size=128, 108 critic_head_hidden_size=128, 109 ) 110 elif env_id == 'SpaceInvadersNoFrameskip-v4': 111 cfg.n_sample = 320 112 cfg.batch_size = 320 113 cfg.epoch_per_collect = 1 114 cfg.learning_rate = 1e-3 115 cfg.entropy_weight = 0.01 116 cfg.lr_scheduler = (2000, 0.1) 117 cfg.model = dict( 118 encoder_hidden_size_list=[64, 64, 128], 119 actor_head_hidden_size=128, 120 critic_head_hidden_size=128, 121 ) 122 elif env_id == 'QbertNoFrameskip-v4': 123 cfg.n_sample = 3200 124 cfg.batch_size = 320 125 cfg.epoch_per_collect = 10 126 cfg.learning_rate = 5e-4 127 cfg.lr_scheduler = (1000, 0.1) 128 cfg.model = dict( 129 encoder_hidden_size_list=[64, 64, 128], 130 actor_head_hidden_size=128, 131 critic_head_hidden_size=128, 132 ) 133 elif env_id == 'minigrid_fourroom': 134 cfg.n_sample = 3200 135 cfg.batch_size = 320 136 cfg.learning_rate = 3e-4 137 cfg.epoch_per_collect = 10 138 cfg.entropy_weight = 0.001 139 elif env_id == 'metadrive': 140 cfg.learning_rate = 3e-4 141 cfg.action_space = 'continuous' 142 cfg.entropy_weight = 0.001 143 cfg.n_sample = 3000 144 cfg.epoch_per_collect = 10 145 cfg.learning_rate = 0.0001 146 cfg.model = dict( 147 encoder_hidden_size_list=[32, 64, 64, 128], 148 actor_head_hidden_size=128, 149 critic_head_hidden_size=128, 150 critic_head_layer_num=2, 151 ) 152 elif env_id == 'Hopper-v3': 153 cfg.action_space = "continuous" 154 cfg.n_sample = 3200 155 cfg.batch_size = 320 156 cfg.epoch_per_collect = 10 157 cfg.learning_rate = 3e-4 158 elif env_id == 'HalfCheetah-v3': 159 cfg.action_space = "continuous" 160 cfg.n_sample = 3200 161 cfg.batch_size = 320 162 cfg.epoch_per_collect = 10 163 cfg.learning_rate = 3e-4 164 elif env_id == 'Walker2d-v3': 165 cfg.action_space = "continuous" 166 cfg.n_sample = 3200 167 cfg.batch_size = 320 168 cfg.epoch_per_collect = 10 169 cfg.learning_rate = 3e-4 170 else: 171 raise KeyError("not supported env type: {}".format(env_id)) 172 else: 173 raise KeyError("not supported algorithm type: {}".format(algorithm)) 174 175 return cfg 176 177 178def get_instance_env(env_id: str) -> BaseEnv: 179 if env_id == 'LunarLander-v2': 180 return DingEnvWrapper(gym.make('LunarLander-v2')) 181 elif env_id == 'LunarLanderContinuous-v2': 182 return DingEnvWrapper(gym.make('LunarLanderContinuous-v2', continuous=True)) 183 elif env_id == 'BipedalWalker-v3': 184 return DingEnvWrapper(gym.make('BipedalWalker-v3'), cfg={'act_scale': True, 'rew_clip': True}) 185 elif env_id == 'Pendulum-v1': 186 return DingEnvWrapper(gym.make('Pendulum-v1'), cfg={'act_scale': True}) 187 elif env_id == 'acrobot': 188 return DingEnvWrapper(gym.make('Acrobot-v1')) 189 elif env_id == 'rocket_landing': 190 from dizoo.rocket.envs import RocketEnv 191 cfg = EasyDict({ 192 'task': 'landing', 193 'max_steps': 800, 194 }) 195 return RocketEnv(cfg) 196 elif env_id == 'drone_fly': 197 from dizoo.gym_pybullet_drones.envs import GymPybulletDronesEnv 198 cfg = EasyDict({ 199 'env_id': 'flythrugate-aviary-v0', 200 'action_type': 'VEL', 201 }) 202 return GymPybulletDronesEnv(cfg) 203 elif env_id == 'hybrid_moving': 204 import gym_hybrid 205 return DingEnvWrapper(gym.make('Moving-v0')) 206 elif env_id == 'evogym_carrier': 207 import evogym.envs 208 from evogym import sample_robot, WorldObject 209 path = os.path.join(os.path.dirname(__file__), '../../dizoo/evogym/envs/world_data/carry_bot.json') 210 robot_object = WorldObject.from_json(path) 211 body = robot_object.get_structure() 212 return DingEnvWrapper( 213 gym.make('Carrier-v0', body=body), 214 cfg={ 215 'env_wrapper': [ 216 lambda env: TimeLimitWrapper(env, max_limit=300), 217 lambda env: EvalEpisodeReturnWrapper(env), 218 ] 219 } 220 ) 221 elif env_id == 'mario': 222 import gym_super_mario_bros 223 from nes_py.wrappers import JoypadSpace 224 return DingEnvWrapper( 225 JoypadSpace(gym_super_mario_bros.make("SuperMarioBros-1-1-v1"), [["right"], ["right", "A"]]), 226 cfg={ 227 'env_wrapper': [ 228 lambda env: MaxAndSkipWrapper(env, skip=4), 229 lambda env: WarpFrameWrapper(env, size=84), 230 lambda env: ScaledFloatFrameWrapper(env), 231 lambda env: FrameStackWrapper(env, n_frames=4), 232 lambda env: TimeLimitWrapper(env, max_limit=200), 233 lambda env: EvalEpisodeReturnWrapper(env), 234 ] 235 } 236 ) 237 elif env_id == 'di_sheep': 238 from sheep_env import SheepEnv 239 return DingEnvWrapper(SheepEnv(level=9)) 240 elif env_id == 'procgen_bigfish': 241 return DingEnvWrapper( 242 gym.make('procgen:procgen-bigfish-v0', start_level=0, num_levels=1), 243 cfg={ 244 'env_wrapper': [ 245 lambda env: TransposeWrapper(env), 246 lambda env: ScaledFloatFrameWrapper(env), 247 lambda env: EvalEpisodeReturnWrapper(env), 248 ] 249 }, 250 seed_api=False, 251 ) 252 elif env_id == 'Hopper-v3': 253 cfg = EasyDict( 254 env_id='Hopper-v3', 255 env_wrapper='mujoco_default', 256 act_scale=True, 257 rew_clip=True, 258 ) 259 return DingEnvWrapper(gym.make('Hopper-v3'), cfg=cfg) 260 elif env_id == 'HalfCheetah-v3': 261 cfg = EasyDict( 262 env_id='HalfCheetah-v3', 263 env_wrapper='mujoco_default', 264 act_scale=True, 265 rew_clip=True, 266 ) 267 return DingEnvWrapper(gym.make('HalfCheetah-v3'), cfg=cfg) 268 elif env_id == 'Walker2d-v3': 269 cfg = EasyDict( 270 env_id='Walker2d-v3', 271 env_wrapper='mujoco_default', 272 act_scale=True, 273 rew_clip=True, 274 ) 275 return DingEnvWrapper(gym.make('Walker2d-v3'), cfg=cfg) 276 277 elif env_id in [ 278 'BowlingNoFrameskip-v4', 279 'BreakoutNoFrameskip-v4', 280 'GopherNoFrameskip-v4' 281 'KangarooNoFrameskip-v4', 282 'PongNoFrameskip-v4', 283 'QbertNoFrameskip-v4', 284 'SpaceInvadersNoFrameskip-v4', 285 ]: 286 287 cfg = EasyDict({ 288 'env_id': env_id, 289 'env_wrapper': 'atari_default', 290 }) 291 ding_env_atari = DingEnvWrapper(gym.make(env_id), cfg=cfg) 292 return ding_env_atari 293 elif env_id == 'minigrid_fourroom': 294 import gymnasium 295 return DingEnvWrapper( 296 gymnasium.make('MiniGrid-FourRooms-v0'), 297 cfg={ 298 'env_wrapper': [ 299 lambda env: GymToGymnasiumWrapper(env), 300 lambda env: FlatObsWrapper(env), 301 lambda env: TimeLimitWrapper(env, max_limit=300), 302 lambda env: EvalEpisodeReturnWrapper(env), 303 ] 304 } 305 ) 306 elif env_id == 'metadrive': 307 from dizoo.metadrive.env.drive_env import MetaDrivePPOOriginEnv 308 from dizoo.metadrive.env.drive_wrapper import DriveEnvWrapper 309 cfg = dict( 310 map='XSOS', 311 horizon=4000, 312 out_of_road_penalty=40.0, 313 crash_vehicle_penalty=40.0, 314 out_of_route_done=True, 315 ) 316 cfg = EasyDict(cfg) 317 return DriveEnvWrapper(MetaDrivePPOOriginEnv(cfg)) 318 else: 319 raise KeyError("not supported env type: {}".format(env_id)) 320 321 322def get_hybrid_shape(action_space) -> EasyDict: 323 return EasyDict({ 324 'action_type_shape': action_space[0].n, 325 'action_args_shape': action_space[1].shape, 326 })