Commit 1f94ae78 authored by Yuxin Wu's avatar Yuxin Wu

notes and docs

parent 38d02e34
...@@ -36,7 +36,7 @@ UPDATE_FREQ = 4 ...@@ -36,7 +36,7 @@ UPDATE_FREQ = 4
GAMMA = 0.99 GAMMA = 0.99
MEMORY_SIZE = 1e6 MEMORY_SIZE = 1e6
# NOTE: will consume at least 1e6 * 84 * 84 bytes == 6.6G memory. # will consume at least 1e6 * 84 * 84 bytes == 6.6G memory.
INIT_MEMORY_SIZE = 5e4 INIT_MEMORY_SIZE = 5e4
STEPS_PER_EPOCH = 10000 // UPDATE_FREQ * 10 # each epoch is 100k played frames STEPS_PER_EPOCH = 10000 // UPDATE_FREQ * 10 # each epoch is 100k played frames
EVAL_EPISODE = 50 EVAL_EPISODE = 50
...@@ -70,11 +70,12 @@ class Model(DQNModel): ...@@ -70,11 +70,12 @@ class Model(DQNModel):
with argscope(Conv2D, nl=PReLU.symbolic_function, use_bias=True), \ with argscope(Conv2D, nl=PReLU.symbolic_function, use_bias=True), \
argscope(LeakyReLU, alpha=0.01): argscope(LeakyReLU, alpha=0.01):
l = (LinearWrap(image) l = (LinearWrap(image)
# the original arch is 2x faster # Nature architecture
.Conv2D('conv0', out_channel=32, kernel_shape=8, stride=4) .Conv2D('conv0', out_channel=32, kernel_shape=8, stride=4)
.Conv2D('conv1', out_channel=64, kernel_shape=4, stride=2) .Conv2D('conv1', out_channel=64, kernel_shape=4, stride=2)
.Conv2D('conv2', out_channel=64, kernel_shape=3) .Conv2D('conv2', out_channel=64, kernel_shape=3)
# architecture used for the figure in the README, slower but takes fewer iterations to converge
# .Conv2D('conv0', out_channel=32, kernel_shape=5) # .Conv2D('conv0', out_channel=32, kernel_shape=5)
# .MaxPooling('pool0', 2) # .MaxPooling('pool0', 2)
# .Conv2D('conv1', out_channel=32, kernel_shape=5) # .Conv2D('conv1', out_channel=32, kernel_shape=5)
...@@ -112,25 +113,24 @@ def get_config(): ...@@ -112,25 +113,24 @@ def get_config():
dataflow=expreplay, dataflow=expreplay,
callbacks=[ callbacks=[
ModelSaver(), ModelSaver(),
PeriodicTrigger(
RunOp(DQNModel.update_target_param),
every_k_steps=10000 // UPDATE_FREQ), # update target network every 10k steps
expreplay,
ScheduledHyperParamSetter('learning_rate', ScheduledHyperParamSetter('learning_rate',
[(60, 4e-4), (100, 2e-4)]), [(60, 4e-4), (100, 2e-4)]),
ScheduledHyperParamSetter( ScheduledHyperParamSetter(
ObjAttrParam(expreplay, 'exploration'), ObjAttrParam(expreplay, 'exploration'),
[(0, 1), (10, 0.1), (320, 0.01)], [(0, 1), (10, 0.1), (320, 0.01)], # 1->0.1 in the first million steps
interp='linear'), interp='linear'),
PeriodicTrigger(
RunOp(DQNModel.update_target_param),
every_k_steps=10000 // UPDATE_FREQ),
expreplay,
PeriodicTrigger(Evaluator( PeriodicTrigger(Evaluator(
EVAL_EPISODE, ['state'], ['Qvalue'], get_player), EVAL_EPISODE, ['state'], ['Qvalue'], get_player),
every_k_epochs=10), every_k_epochs=10),
HumanHyperParamSetter('learning_rate'), HumanHyperParamSetter('learning_rate'),
# HumanHyperParamSetter(ObjAttrParam(expreplay, 'exploration'), 'hyper.txt'),
], ],
model=M, model=M,
steps_per_epoch=STEPS_PER_EPOCH, steps_per_epoch=STEPS_PER_EPOCH,
max_epoch=3000, max_epoch=1000,
# run the simulator on a separate GPU if available # run the simulator on a separate GPU if available
predict_tower=[1] if get_nr_gpu() > 1 else [0], predict_tower=[1] if get_nr_gpu() > 1 else [0],
) )
......
...@@ -19,13 +19,10 @@ Claimed performance in the paper can be reproduced, on several games I've tested ...@@ -19,13 +19,10 @@ Claimed performance in the paper can be reproduced, on several games I've tested
![DQN](curve-breakout.png) ![DQN](curve-breakout.png)
DQN typically took 1 day of training to reach a score of 400 on breakout game (same as the paper). On one TitanX, Double-DQN took 1 day of training to reach a score of 400 on breakout game.
My Batch-A3C implementation only took <2 hours. Batch-A3C implementation only took <2 hours. (Both are trained with a larger network noted in the code).
Both were trained on one GPU with an extra GPU for simulation.
Double-DQN runs at 18 batches/s (1152 frames/s) on TitanX. Double-DQN runs at 60 batches (3840 trained frames, 240 seen frames, 960 game frames) per second on TitanX.
Note that I wasn't using the network architecture in the paper.
If switched to the network in the paper it could run 2x faster.
## How to use ## How to use
......
...@@ -217,8 +217,9 @@ class ScheduledHyperParamSetter(HyperParamSetter): ...@@ -217,8 +217,9 @@ class ScheduledHyperParamSetter(HyperParamSetter):
param: same as in :class:`HyperParamSetter`. param: same as in :class:`HyperParamSetter`.
schedule (list): with the format ``[(epoch1, val1), (epoch2, val2), (epoch3, val3)]``. schedule (list): with the format ``[(epoch1, val1), (epoch2, val2), (epoch3, val3)]``.
Each ``(ep, val)`` pair means to set the param Each ``(ep, val)`` pair means to set the param
to "val" __after__ the completion of `ep` th epoch. to "val" __after__ the completion of epoch `ep`.
If ep == 0, the value will be set before the first epoch. If ep == 0, the value will be set before the first epoch
(by default the first is epoch 1).
interp: None: no interpolation. 'linear': linear interpolation interp: None: no interpolation. 'linear': linear interpolation
Example: Example:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment