ding.worker.coordinator.base_serial_commander¶
ding.worker.coordinator.base_serial_commander
¶
BaseSerialCommander
¶
Bases: object
Overview
Base serial commander class.
Interface: init, step Property: policy
__init__(cfg, learner, collector, evaluator, replay_buffer, policy=None)
¶
Overview
Init the BaseSerialCommander
Arguments:
- cfg (:obj:dict): the config of commander
- learner (:obj:BaseLearner): the learner
- collector (:obj:BaseSerialCollector): the collector
- evaluator (:obj:InteractionSerialEvaluator): the evaluator
- replay_buffer (:obj:IBuffer): the buffer
step()
¶
Overview
Step the commander
Full Source Code
../ding/worker/coordinator/base_serial_commander.py
1from collections import namedtuple 2from easydict import EasyDict 3import copy 4 5 6class BaseSerialCommander(object): 7 r""" 8 Overview: 9 Base serial commander class. 10 Interface: 11 __init__, step 12 Property: 13 policy 14 """ 15 16 @classmethod 17 def default_config(cls: type) -> EasyDict: 18 cfg = EasyDict(copy.deepcopy(cls.config)) 19 cfg.cfg_type = cls.__name__ + 'Dict' 20 return cfg 21 22 config = {} 23 24 def __init__( 25 self, 26 cfg: dict, 27 learner: 'BaseLearner', # noqa 28 collector: 'BaseSerialCollector', # noqa 29 evaluator: 'InteractionSerialEvaluator', # noqa 30 replay_buffer: 'IBuffer', # noqa 31 policy: namedtuple = None, 32 ) -> None: 33 r""" 34 Overview: 35 Init the BaseSerialCommander 36 Arguments: 37 - cfg (:obj:`dict`): the config of commander 38 - learner (:obj:`BaseLearner`): the learner 39 - collector (:obj:`BaseSerialCollector`): the collector 40 - evaluator (:obj:`InteractionSerialEvaluator`): the evaluator 41 - replay_buffer (:obj:`IBuffer`): the buffer 42 """ 43 self._cfg = cfg 44 self._learner = learner 45 self._collector = collector 46 self._evaluator = evaluator 47 self._replay_buffer = replay_buffer 48 self._info = {} 49 if policy is not None: 50 self.policy = policy 51 52 def step(self) -> None: 53 r""" 54 Overview: 55 Step the commander 56 """ 57 # Update info 58 learn_info = self._learner.learn_info 59 collector_info = {'envstep': self._collector.envstep} 60 self._info.update(learn_info) 61 self._info.update(collector_info) 62 # update kwargs 63 collect_kwargs = self._policy.get_setting_collect(self._info) 64 return collect_kwargs 65 66 @property 67 def policy(self) -> 'Policy': # noqa 68 return self._policy 69 70 @policy.setter 71 def policy(self, _policy: 'Policy') -> None: # noqa 72 self._policy = _policy