Update DQN models

00f83c13 · Yuxin Wu · 81127236 · 00f83c13 · 00f83c13 · 00f83c13
Commit 00f83c13 authored Aug 10, 2019 by Yuxin Wu
4 changed files
--- a/examples/DeepQNetwork/DQN.py
+++ b/examples/DeepQNetwork/DQN.py
@@ -20,12 +20,12 @@ from expreplay import ExpReplay
 BATCH_SIZE = 64
 IMAGE_SIZE = (84, 84)
 FRAME_HISTORY = 4
-UPDATE_FREQ = 4
+UPDATE_FREQ = 4  # the number of new state transitions per parameter update (per training step)
 MEMORY_SIZE = 1e6
 # will consume at least 1e6 * 84 * 84 bytes == 6.6G memory.
 INIT_MEMORY_SIZE = MEMORY_SIZE // 20
-STEPS_PER_EPOCH = 100000 // UPDATE_FREQ  # each epoch is 100k played frames
+STEPS_PER_EPOCH = 100000 // UPDATE_FREQ  # each epoch is 100k state transitions
 EVAL_EPISODE = 50
 NUM_PARALLEL_PLAYERS = 3
@@ -46,7 +46,6 @@ def get_player(viz=False, train=False):
        env = gym.make(ENV_NAME)
    else:
        from atari import AtariPlayer
-        # frame_skip=4 is what's used in the original paper
        env = AtariPlayer(ENV_NAME, frame_skip=4, viz=viz,
                          live_lost_as_eoe=train, max_num_frames=60000)
    env = FireResetEnv(env)
@@ -127,21 +126,20 @@ def get_config(model):
            ModelSaver(),
            PeriodicTrigger(
                RunOp(DQNModel.update_target_param, verbose=True),
-                every_k_steps=10000 // UPDATE_FREQ),    # update target network every 10k steps
+                every_k_steps=5000),    # update target network every 5k steps
            expreplay,
            ScheduledHyperParamSetter('learning_rate',
-                                      [(0, 1e-3), (60, 4e-4), (100, 2e-4), (500, 5e-5)]),
+                                      [(0, 1e-3), (60, 5e-4), (400, 1e-4)]),
            ScheduledHyperParamSetter(
                ObjAttrParam(expreplay, 'exploration'),
-                [(0, 1), (10, 0.1), (320, 0.01)],   # 1->0.1 in the first million steps
+                [(0, 1), (10, 0.1), (400, 0.01)],   # 1->0.1 in the first million steps
                interp='linear'),
            PeriodicTrigger(Evaluator(
                EVAL_EPISODE, ['state'], ['Qvalue'], get_player),
                every_k_epochs=5 if 'pong' in args.env.lower() else 10),  # eval more frequently for easy games
-            HumanHyperParamSetter('learning_rate'),
        ],
        steps_per_epoch=STEPS_PER_EPOCH,
-        max_epoch=800,
+        max_epoch=500,  # a total of 50M state transition
    )

--- a/examples/DeepQNetwork/DQNModel.py
+++ b/examples/DeepQNetwork/DQNModel.py
@@ -100,7 +100,8 @@ class Model(ModelDesc):
    def optimizer(self):
        lr = tf.get_variable('learning_rate', initializer=1e-3, trainable=False)
-        opt = tf.train.RMSPropOptimizer(lr, epsilon=1e-5)
+        tf.summary.scalar("learning_rate", lr)
+        opt = tf.train.RMSPropOptimizer(lr, decay=0.95, momentum=0.95, epsilon=1e-2)
        return optimizer.apply_grad_processors(opt, [gradproc.SummaryGradient()])
    @staticmethod

--- a/examples/DeepQNetwork/README.md
+++ b/examples/DeepQNetwork/README.md
@@ -15,26 +15,14 @@ Reproduce (performance of) the following reinforcement learning methods:
 + A3C in [Asynchronous Methods for Deep Reinforcement Learning](http://arxiv.org/abs/1602.01783). (I
 used a modified version where each batch contains transitions from different simulators, which I called "Batch-A3C".)
-## Performance & Speed
+## Usage:
-Claimed performance in the paper can be reproduced, on several games I've tested with.
-![DQN](curve-breakout.png)
-On one GTX 1080Ti,
-the ALE version took
-__~2 hours__ of training to reach 21 (maximum) score on Pong,
-__~10 hours__ of training to reach 400 score on Breakout.
-It runs at 100 batches (6.4k trained frames, 400 seen frames, 1.6k game frames) per second on GTX 1080Ti.
-This is likely the fastest open source TF implementation of DQN.
-## How to use
 ### With ALE (paper's setting):
 Install [ALE](https://github.com/mgbellemare/Arcade-Learning-Environment) and gym.
-Download an [atari rom](https://github.com/openai/atari-py/tree/master/atari_py/atari_roms), e.g.:
+Download an [atari rom](https://github.com/openai/atari-py/tree/gdb/atari_py/atari_roms), e.g.:
 ```
-wget https://github.com/openai/atari-py/raw/master/atari_py/atari_roms/breakout.bin
+wget https://github.com/openai/atari-py/raw/gdb/atari_py/atari_roms/breakout.bin
 ```
 Start Training:
@@ -46,16 +34,35 @@ Start Training:
 Watch the agent play:
 ```
 # Download pretrained models or use one you trained:
-wget http://models.tensorpack.com/DeepQNetwork/DoubleDQN-Breakout.npz
+wget http://models.tensorpack.com/DeepQNetwork/DoubleDQN-breakout.bin.npz
-./DQN.py --env breakout.bin --task play --load DoubleDQN-Breakout.npz
+./DQN.py --env breakout.bin --task play --load DoubleDQN-breakout.bin.npz
+```
+Evaluation of 50 episodes:
+```
+./DQN.py --env breakout.bin --task eval --load DoubleDQN-breakout.bin.npz
 ```
 ### With gym's Atari:
-Install gym and atari_py.
+Install gym and atari_py. Use `--env BreakoutDeterministic-v4` instead of the ROM file.
-```
+## Performance
-./DQN.py --env BreakoutDeterministic-v4
+Claimed performance in the paper can be reproduced, on several games I've tested with.
-```
+![DQN](curve-breakout.png)
+| Environment  | Avg Score | Download                                                                             |
+|:-------------|:---------:|:------------------------------------------------------------------------------------:|
+| breakout.bin | 465       | [:arrow_down:](http://models.tensorpack.com/DeepQNetwork/DoubleDQN-breakout.bin.npz) |
+| seaquest.bin | 8686      | [:arrow_down:](http://models.tensorpack.com/DeepQNetwork/DoubleDQN-seaquest.bin.npz) |
+## Speed
+On one GTX 1080Ti,
+the ALE version took
+__~2 hours__ of training to reach 21 (maximum) score on Pong,
+__~10 hours__ of training to reach 400 score on Breakout.
+It runs at 100 batches (6.4k trained frames, 400 seen frames, 1.6k game frames) per second on GTX 1080Ti.
+This is likely the fastest open source TF implementation of DQN.
 A3C code and models for Atari games in OpenAI Gym are released in [examples/A3C-Gym](../A3C-Gym)
--- a/examples/DeepQNetwork/atari.py
+++ b/examples/DeepQNetwork/atari.py
@@ -19,7 +19,7 @@ from tensorpack.utils.utils import execute_only_once, get_rng
 __all__ = ['AtariPlayer']
-ROM_URL = "https://github.com/openai/atari-py/tree/master/atari_py/atari_roms"
+ROM_URL = "https://github.com/openai/atari-py/tree/gdb/atari_py/atari_roms"
 _ALE_LOCK = threading.Lock()
@@ -51,7 +51,7 @@ class AtariPlayer(gym.Env):
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_path('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
-            "rom {} not found. Please download at {}".format(rom_file, ROM_URL)
+            "ROM {} not found. Please download at {}".format(rom_file, ROM_URL)
        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Error)