match "steps"

a8dfad63 · Yuxin Wu · b61a2d89 · a8dfad63 · a8dfad63 · a8dfad63
Commit a8dfad63 authored May 30, 2017 by Yuxin Wu
Showing with 17 additions and 14 deletions

examples/DeepQNetwork/DQN.py examples/DeepQNetwork/DQN.py +12 -8

examples/DeepQNetwork/expreplay.py examples/DeepQNetwork/expreplay.py +4 -5

tensorpack/callbacks/graph.py tensorpack/callbacks/graph.py +1 -1

No files found.
--- a/examples/DeepQNetwork/DQN.py
+++ b/examples/DeepQNetwork/DQN.py
@@ -30,14 +30,15 @@ from expreplay import ExpReplay
 BATCH_SIZE = 64
 IMAGE_SIZE = (84, 84)
 FRAME_HISTORY = 4
-ACTION_REPEAT = 4
+ACTION_REPEAT = 4   # aka FRAME_SKIP
+UPDATE_FREQ = 4
 GAMMA = 0.99
 MEMORY_SIZE = 1e6
 # NOTE: will consume at least 1e6 * 84 * 84 bytes == 6.6G memory.
 INIT_MEMORY_SIZE = 5e4
-STEPS_PER_EPOCH = 10000
+STEPS_PER_EPOCH = 10000 // UPDATE_FREQ * 10  # each epoch is 100k played frames
 EVAL_EPISODE = 50
 NUM_ACTIONS = None
@@ -97,7 +98,7 @@ def get_config():
        memory_size=MEMORY_SIZE,
        init_memory_size=INIT_MEMORY_SIZE,
        init_exploration=1.0,
-        update_frequency=4,
+        update_frequency=UPDATE_FREQ,
        history_len=FRAME_HISTORY
    )
@@ -106,21 +107,24 @@ def get_config():
        callbacks=[
            ModelSaver(),
            ScheduledHyperParamSetter('learning_rate',
-                                      [(150, 4e-4), (250, 1e-4), (350, 5e-5)]),
+                                      [(60, 4e-4), (100, 2e-4)]),
            ScheduledHyperParamSetter(
                ObjAttrParam(expreplay, 'exploration'),
-                [(0, 1), (100, 0.1), (200, 0.01)],
+                [(0, 1), (10, 0.1), (240, 0.01)],
                interp='linear'),
-            RunOp(DQNModel.update_target_param),
+            PeriodicTrigger(
+                RunOp(DQNModel.update_target_param),
+                every_k_steps=10000 // UPDATE_FREQ),
            expreplay,
            PeriodicTrigger(Evaluator(
                EVAL_EPISODE, ['state'], ['Qvalue'], get_player),
-                every_k_epochs=5),
+                every_k_epochs=10),
-            # HumanHyperParamSetter('learning_rate', 'hyper.txt'),
+            HumanHyperParamSetter('learning_rate'),
            # HumanHyperParamSetter(ObjAttrParam(expreplay, 'exploration'), 'hyper.txt'),
        ],
        model=M,
        steps_per_epoch=STEPS_PER_EPOCH,
+        max_epoch=3000,
        # run the simulator on a separate GPU if available
        predict_tower=[1] if get_nr_gpu() > 1 else [0],
    )

--- a/examples/DeepQNetwork/expreplay.py
+++ b/examples/DeepQNetwork/expreplay.py
@@ -147,7 +147,6 @@ class ExpReplay(DataFlow, Callback):
        self.rng = get_rng(self)
        self._init_memory_flag = threading.Event()  # tell if memory has been initialized
-        # TODO just use a semaphore?
        # a queue to receive notifications to populate memory
        self._populate_job_queue = queue.Queue(maxsize=5)
@@ -246,15 +245,15 @@ class ExpReplay(DataFlow, Callback):
        self._simulator_th.start()
    def _trigger_epoch(self):
-        # log player statistics
+        # log player statistics in training
        stats = self.player.stats
        for k, v in six.iteritems(stats):
            try:
                mean, max = np.mean(v), np.max(v)
-                self.trainer.add_scalar_summary('expreplay/mean_' + k, mean)
+                self.trainer.monitors.put_scalar('expreplay/mean_' + k, mean)
-                self.trainer.add_scalar_summary('expreplay/max_' + k, max)
+                self.trainer.monitors.put_scalar('expreplay/max_' + k, max)
            except:
-                pass
+                logger.exception("Cannot log training scores.")
        self.player.reset_stat()

--- a/tensorpack/callbacks/graph.py
+++ b/tensorpack/callbacks/graph.py
@@ -27,7 +27,7 @@ class RunOp(Callback):
        Examples:
            The `DQN Example
-            <https://github.com/ppwwyyxx/tensorpack/blob/master/examples/Atari2600/DQN.py#L182>`_
+            <https://github.com/ppwwyyxx/tensorpack/blob/master/examples/DeepQNetwork/>`_
            uses this callback to update target network.
        """
        self.setup_func = setup_func