Commit 00f83c13 authored by Yuxin Wu's avatar Yuxin Wu

Update DQN models

parent 81127236
......@@ -20,12 +20,12 @@ from expreplay import ExpReplay
BATCH_SIZE = 64
IMAGE_SIZE = (84, 84)
FRAME_HISTORY = 4
UPDATE_FREQ = 4
UPDATE_FREQ = 4 # the number of new state transitions per parameter update (per training step)
MEMORY_SIZE = 1e6
# will consume at least 1e6 * 84 * 84 bytes == 6.6G memory.
INIT_MEMORY_SIZE = MEMORY_SIZE // 20
STEPS_PER_EPOCH = 100000 // UPDATE_FREQ # each epoch is 100k played frames
STEPS_PER_EPOCH = 100000 // UPDATE_FREQ # each epoch is 100k state transitions
EVAL_EPISODE = 50
NUM_PARALLEL_PLAYERS = 3
......@@ -46,7 +46,6 @@ def get_player(viz=False, train=False):
env = gym.make(ENV_NAME)
else:
from atari import AtariPlayer
# frame_skip=4 is what's used in the original paper
env = AtariPlayer(ENV_NAME, frame_skip=4, viz=viz,
live_lost_as_eoe=train, max_num_frames=60000)
env = FireResetEnv(env)
......@@ -127,21 +126,20 @@ def get_config(model):
ModelSaver(),
PeriodicTrigger(
RunOp(DQNModel.update_target_param, verbose=True),
every_k_steps=10000 // UPDATE_FREQ), # update target network every 10k steps
every_k_steps=5000), # update target network every 5k steps
expreplay,
ScheduledHyperParamSetter('learning_rate',
[(0, 1e-3), (60, 4e-4), (100, 2e-4), (500, 5e-5)]),
[(0, 1e-3), (60, 5e-4), (400, 1e-4)]),
ScheduledHyperParamSetter(
ObjAttrParam(expreplay, 'exploration'),
[(0, 1), (10, 0.1), (320, 0.01)], # 1->0.1 in the first million steps
[(0, 1), (10, 0.1), (400, 0.01)], # 1->0.1 in the first million steps
interp='linear'),
PeriodicTrigger(Evaluator(
EVAL_EPISODE, ['state'], ['Qvalue'], get_player),
every_k_epochs=5 if 'pong' in args.env.lower() else 10), # eval more frequently for easy games
HumanHyperParamSetter('learning_rate'),
],
steps_per_epoch=STEPS_PER_EPOCH,
max_epoch=800,
max_epoch=500, # a total of 50M state transition
)
......
......@@ -100,7 +100,8 @@ class Model(ModelDesc):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=1e-3, trainable=False)
opt = tf.train.RMSPropOptimizer(lr, epsilon=1e-5)
tf.summary.scalar("learning_rate", lr)
opt = tf.train.RMSPropOptimizer(lr, decay=0.95, momentum=0.95, epsilon=1e-2)
return optimizer.apply_grad_processors(opt, [gradproc.SummaryGradient()])
@staticmethod
......
......@@ -15,26 +15,14 @@ Reproduce (performance of) the following reinforcement learning methods:
+ A3C in [Asynchronous Methods for Deep Reinforcement Learning](http://arxiv.org/abs/1602.01783). (I
used a modified version where each batch contains transitions from different simulators, which I called "Batch-A3C".)
## Performance & Speed
Claimed performance in the paper can be reproduced, on several games I've tested with.
![DQN](curve-breakout.png)
On one GTX 1080Ti,
the ALE version took
__~2 hours__ of training to reach 21 (maximum) score on Pong,
__~10 hours__ of training to reach 400 score on Breakout.
It runs at 100 batches (6.4k trained frames, 400 seen frames, 1.6k game frames) per second on GTX 1080Ti.
This is likely the fastest open source TF implementation of DQN.
## How to use
## Usage:
### With ALE (paper's setting):
Install [ALE](https://github.com/mgbellemare/Arcade-Learning-Environment) and gym.
Download an [atari rom](https://github.com/openai/atari-py/tree/master/atari_py/atari_roms), e.g.:
Download an [atari rom](https://github.com/openai/atari-py/tree/gdb/atari_py/atari_roms), e.g.:
```
wget https://github.com/openai/atari-py/raw/master/atari_py/atari_roms/breakout.bin
wget https://github.com/openai/atari-py/raw/gdb/atari_py/atari_roms/breakout.bin
```
Start Training:
......@@ -46,16 +34,35 @@ Start Training:
Watch the agent play:
```
# Download pretrained models or use one you trained:
wget http://models.tensorpack.com/DeepQNetwork/DoubleDQN-Breakout.npz
./DQN.py --env breakout.bin --task play --load DoubleDQN-Breakout.npz
wget http://models.tensorpack.com/DeepQNetwork/DoubleDQN-breakout.bin.npz
./DQN.py --env breakout.bin --task play --load DoubleDQN-breakout.bin.npz
```
Evaluation of 50 episodes:
```
./DQN.py --env breakout.bin --task eval --load DoubleDQN-breakout.bin.npz
```
### With gym's Atari:
Install gym and atari_py.
Install gym and atari_py. Use `--env BreakoutDeterministic-v4` instead of the ROM file.
```
./DQN.py --env BreakoutDeterministic-v4
```
## Performance
Claimed performance in the paper can be reproduced, on several games I've tested with.
![DQN](curve-breakout.png)
| Environment | Avg Score | Download |
|:-------------|:---------:|:------------------------------------------------------------------------------------:|
| breakout.bin | 465 | [:arrow_down:](http://models.tensorpack.com/DeepQNetwork/DoubleDQN-breakout.bin.npz) |
| seaquest.bin | 8686 | [:arrow_down:](http://models.tensorpack.com/DeepQNetwork/DoubleDQN-seaquest.bin.npz) |
## Speed
On one GTX 1080Ti,
the ALE version took
__~2 hours__ of training to reach 21 (maximum) score on Pong,
__~10 hours__ of training to reach 400 score on Breakout.
It runs at 100 batches (6.4k trained frames, 400 seen frames, 1.6k game frames) per second on GTX 1080Ti.
This is likely the fastest open source TF implementation of DQN.
A3C code and models for Atari games in OpenAI Gym are released in [examples/A3C-Gym](../A3C-Gym)
......@@ -19,7 +19,7 @@ from tensorpack.utils.utils import execute_only_once, get_rng
__all__ = ['AtariPlayer']
ROM_URL = "https://github.com/openai/atari-py/tree/master/atari_py/atari_roms"
ROM_URL = "https://github.com/openai/atari-py/tree/gdb/atari_py/atari_roms"
_ALE_LOCK = threading.Lock()
......@@ -51,7 +51,7 @@ class AtariPlayer(gym.Env):
if not os.path.isfile(rom_file) and '/' not in rom_file:
rom_file = get_dataset_path('atari_rom', rom_file)
assert os.path.isfile(rom_file), \
"rom {} not found. Please download at {}".format(rom_file, ROM_URL)
"ROM {} not found. Please download at {}".format(rom_file, ROM_URL)
try:
ALEInterface.setLoggerMode(ALEInterface.Logger.Error)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment