Skip to content

ding.league.one_vs_one_league

ding.league.one_vs_one_league

OneVsOneLeague

Bases: BaseLeague

Overview

One vs One battle game league. Decide which two players will play against each other.

Interface: init, run, close, finish_job, update_active_player

Full Source Code

../ding/league/one_vs_one_league.py

1from easydict import EasyDict 2from typing import Optional 3 4from ding.utils import LEAGUE_REGISTRY 5from .base_league import BaseLeague 6from .player import ActivePlayer 7 8 9@LEAGUE_REGISTRY.register('one_vs_one') 10class OneVsOneLeague(BaseLeague): 11 """ 12 Overview: 13 One vs One battle game league. 14 Decide which two players will play against each other. 15 Interface: 16 __init__, run, close, finish_job, update_active_player 17 """ 18 config = dict( 19 league_type='one_vs_one', 20 import_names=["ding.league"], 21 # ---player---- 22 # "player_category" is just a name. Depends on the env. 23 # For example, in StarCraft, this can be ['zerg', 'terran', 'protoss']. 24 player_category=['default'], 25 # Support different types of active players for solo and battle league. 26 # For solo league, supports ['solo_active_player']. 27 # For battle league, supports ['battle_active_player', 'main_player', 'main_exploiter', 'league_exploiter']. 28 active_players=dict( 29 naive_sp_player=1, # {player_type: player_num} 30 ), 31 naive_sp_player=dict( 32 # There should be keys ['one_phase_step', 'branch_probs', 'strong_win_rate']. 33 # Specifically for 'main_exploiter' of StarCraft, there should be an additional key ['min_valid_win_rate']. 34 one_phase_step=10, 35 branch_probs=dict( 36 pfsp=0.5, 37 sp=0.5, 38 ), 39 strong_win_rate=0.7, 40 ), 41 # "use_pretrain" means whether to use pretrain model to initialize active player. 42 use_pretrain=False, 43 # "use_pretrain_init_historical" means whether to use pretrain model to initialize historical player. 44 # "pretrain_checkpoint_path" is the pretrain checkpoint path used in "use_pretrain" and 45 # "use_pretrain_init_historical". If both are False, "pretrain_checkpoint_path" can be omitted as well. 46 # Otherwise, "pretrain_checkpoint_path" should list paths of all player categories. 47 use_pretrain_init_historical=False, 48 pretrain_checkpoint_path=dict(default='default_cate_pretrain.pth', ), 49 # ---payoff--- 50 payoff=dict( 51 # Supports ['battle'] 52 type='battle', 53 decay=0.99, 54 min_win_rate_games=8, 55 ), 56 metric=dict( 57 mu=0, 58 sigma=25 / 3, 59 beta=25 / 3 / 2, 60 tau=0.0, 61 draw_probability=0.02, 62 ), 63 ) 64 65 # override 66 def _get_job_info(self, player: ActivePlayer, eval_flag: bool = False) -> dict: 67 """ 68 Overview: 69 Get player's job related info, called by ``_launch_job``. 70 Arguments: 71 - player (:obj:`ActivePlayer`): The active player that will be assigned a job. 72 """ 73 assert isinstance(player, ActivePlayer), player.__class__ 74 player_job_info = EasyDict(player.get_job(eval_flag)) 75 if eval_flag: 76 return { 77 'agent_num': 1, 78 'launch_player': player.player_id, 79 'player_id': [player.player_id], 80 'checkpoint_path': [player.checkpoint_path], 81 'player_active_flag': [isinstance(player, ActivePlayer)], 82 'eval_opponent': player_job_info.opponent, 83 } 84 else: 85 return { 86 'agent_num': 2, 87 'launch_player': player.player_id, 88 'player_id': [player.player_id, player_job_info.opponent.player_id], 89 'checkpoint_path': [player.checkpoint_path, player_job_info.opponent.checkpoint_path], 90 'player_active_flag': [isinstance(p, ActivePlayer) for p in [player, player_job_info.opponent]], 91 } 92 93 # override 94 def _mutate_player(self, player: ActivePlayer): 95 """ 96 Overview: 97 Players have the probability to be reset to supervised learning model parameters. 98 Arguments: 99 - player (:obj:`ActivePlayer`): The active player that may mutate. 100 """ 101 pass 102 103 # override 104 def _update_player(self, player: ActivePlayer, player_info: dict) -> Optional[bool]: 105 """ 106 Overview: 107 Update an active player, called by ``self.update_active_player``. 108 Arguments: 109 - player (:obj:`ActivePlayer`): The active player that will be updated. 110 - player_info (:obj:`dict`): An info dict of the active player which is to be updated. 111 Returns: 112 - increment_eval_difficulty (:obj:`bool`): Only return this when evaluator calls this method. \ 113 Return True if difficulty is incremented; Otherwise return False (difficulty will not increment \ 114 when it is already the most difficult or evaluator loses) 115 """ 116 assert isinstance(player, ActivePlayer) 117 if 'train_iteration' in player_info: 118 # Update info from learner 119 player.total_agent_step = player_info['train_iteration'] 120 return False 121 elif 'eval_win' in player_info: 122 if player_info['eval_win']: 123 # Update info from evaluator 124 increment_eval_difficulty = player.increment_eval_difficulty() 125 return increment_eval_difficulty 126 else: 127 return False