notes and docs

1f94ae78 · Yuxin Wu · 38d02e34 · 1f94ae78 · 1f94ae78 · 1f94ae78
Commit 1f94ae78 authored Jun 03, 2017 by Yuxin Wu
Showing with 15 additions and 17 deletions

examples/DeepQNetwork/DQN.py examples/DeepQNetwork/DQN.py +9 -9

examples/DeepQNetwork/README.md examples/DeepQNetwork/README.md +3 -6

tensorpack/callbacks/param.py tensorpack/callbacks/param.py +3 -2

No files found.
--- a/examples/DeepQNetwork/DQN.py
+++ b/examples/DeepQNetwork/DQN.py
@@ -36,7 +36,7 @@ UPDATE_FREQ = 4
 GAMMA = 0.99
 MEMORY_SIZE = 1e6
-# NOTE: will consume at least 1e6 * 84 * 84 bytes == 6.6G memory.
+# will consume at least 1e6 * 84 * 84 bytes == 6.6G memory.
 INIT_MEMORY_SIZE = 5e4
 STEPS_PER_EPOCH = 10000 // UPDATE_FREQ * 10  # each epoch is 100k played frames
 EVAL_EPISODE = 50
@@ -70,11 +70,12 @@ class Model(DQNModel):
        with argscope(Conv2D, nl=PReLU.symbolic_function, use_bias=True), \
                argscope(LeakyReLU, alpha=0.01):
            l = (LinearWrap(image)
-                 # the original arch is 2x faster
+                 # Nature architecture
                 .Conv2D('conv0', out_channel=32, kernel_shape=8, stride=4)
                 .Conv2D('conv1', out_channel=64, kernel_shape=4, stride=2)
                 .Conv2D('conv2', out_channel=64, kernel_shape=3)
+                 # architecture used for the figure in the README, slower but takes fewer iterations to converge
                 # .Conv2D('conv0', out_channel=32, kernel_shape=5)
                 # .MaxPooling('pool0', 2)
                 # .Conv2D('conv1', out_channel=32, kernel_shape=5)
@@ -112,25 +113,24 @@ def get_config():
        dataflow=expreplay,
        callbacks=[
            ModelSaver(),
+            PeriodicTrigger(
+                RunOp(DQNModel.update_target_param),
+                every_k_steps=10000 // UPDATE_FREQ),    # update target network every 10k steps
+            expreplay,
            ScheduledHyperParamSetter('learning_rate',
                                      [(60, 4e-4), (100, 2e-4)]),
            ScheduledHyperParamSetter(
                ObjAttrParam(expreplay, 'exploration'),
-                [(0, 1), (10, 0.1), (320, 0.01)],
+                [(0, 1), (10, 0.1), (320, 0.01)],   # 1->0.1 in the first million steps
                interp='linear'),
-            PeriodicTrigger(
-                RunOp(DQNModel.update_target_param),
-                every_k_steps=10000 // UPDATE_FREQ),
-            expreplay,
            PeriodicTrigger(Evaluator(
                EVAL_EPISODE, ['state'], ['Qvalue'], get_player),
                every_k_epochs=10),
            HumanHyperParamSetter('learning_rate'),
-            # HumanHyperParamSetter(ObjAttrParam(expreplay, 'exploration'), 'hyper.txt'),
        ],
        model=M,
        steps_per_epoch=STEPS_PER_EPOCH,
-        max_epoch=3000,
+        max_epoch=1000,
        # run the simulator on a separate GPU if available
        predict_tower=[1] if get_nr_gpu() > 1 else [0],
    )

--- a/examples/DeepQNetwork/README.md
+++ b/examples/DeepQNetwork/README.md
@@ -19,13 +19,10 @@ Claimed performance in the paper can be reproduced, on several games I've tested
 ![DQN](curve-breakout.png)
-DQN typically took 1 day of training to reach a score of 400 on breakout game (same as the paper).
+On one TitanX, Double-DQN took 1 day of training to reach a score of 400 on breakout game.
-My Batch-A3C implementation only took <2 hours.
+Batch-A3C implementation only took <2 hours. (Both are trained with a larger network noted in the code).
-Both were trained on one GPU with an extra GPU for simulation.
-Double-DQN runs at 18 batches/s (1152 frames/s) on TitanX.
+Double-DQN runs at 60 batches (3840 trained frames, 240 seen frames, 960 game frames) per second on TitanX.
-Note that I wasn't using the network architecture in the paper.
-If switched to the network in the paper it could run 2x faster.
 ## How to use

--- a/tensorpack/callbacks/param.py
+++ b/tensorpack/callbacks/param.py
@@ -217,8 +217,9 @@ class ScheduledHyperParamSetter(HyperParamSetter):
            param: same as in :class:`HyperParamSetter`.
            schedule (list): with the format ``[(epoch1, val1), (epoch2, val2), (epoch3, val3)]``.
                Each ``(ep, val)`` pair means to set the param
-                to "val" __after__ the completion of `ep` th epoch.
+                to "val" __after__ the completion of epoch `ep`.
-                If ep == 0, the value will be set before the first epoch.
+                If ep == 0, the value will be set before the first epoch
+                (by default the first is epoch 1).
            interp: None: no interpolation. 'linear': linear interpolation
        Example: