Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
399db3ae
Commit
399db3ae
authored
Jun 03, 2017
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dqn-improve'
parents
77755875
1f94ae78
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
47 additions
and
49 deletions
+47
-49
examples/DeepQNetwork/DQN.py
examples/DeepQNetwork/DQN.py
+34
-31
examples/DeepQNetwork/README.md
examples/DeepQNetwork/README.md
+3
-6
examples/DeepQNetwork/expreplay.py
examples/DeepQNetwork/expreplay.py
+6
-9
tensorpack/callbacks/graph.py
tensorpack/callbacks/graph.py
+1
-1
tensorpack/callbacks/param.py
tensorpack/callbacks/param.py
+3
-2
No files found.
examples/DeepQNetwork/DQN.py
View file @
399db3ae
...
...
@@ -30,18 +30,15 @@ from expreplay import ExpReplay
BATCH_SIZE
=
64
IMAGE_SIZE
=
(
84
,
84
)
FRAME_HISTORY
=
4
ACTION_REPEAT
=
4
ACTION_REPEAT
=
4
# aka FRAME_SKIP
UPDATE_FREQ
=
4
GAMMA
=
0.99
INIT_EXPLORATION
=
1
EXPLORATION_EPOCH_ANNEAL
=
0.01
END_EXPLORATION
=
0.1
MEMORY_SIZE
=
1e6
#
NOTE:
will consume at least 1e6 * 84 * 84 bytes == 6.6G memory.
# will consume at least 1e6 * 84 * 84 bytes == 6.6G memory.
INIT_MEMORY_SIZE
=
5e4
STEPS_PER_EPOCH
=
10000
STEPS_PER_EPOCH
=
10000
//
UPDATE_FREQ
*
10
# each epoch is 100k played frames
EVAL_EPISODE
=
50
NUM_ACTIONS
=
None
...
...
@@ -73,18 +70,19 @@ class Model(DQNModel):
with
argscope
(
Conv2D
,
nl
=
PReLU
.
symbolic_function
,
use_bias
=
True
),
\
argscope
(
LeakyReLU
,
alpha
=
0.01
):
l
=
(
LinearWrap
(
image
)
.
Conv2D
(
'conv0'
,
out_channel
=
32
,
kernel_shape
=
5
)
.
MaxPooling
(
'pool0'
,
2
)
.
Conv2D
(
'conv1'
,
out_channel
=
32
,
kernel_shape
=
5
)
.
MaxPooling
(
'pool1'
,
2
)
.
Conv2D
(
'conv2'
,
out_channel
=
64
,
kernel_shape
=
4
)
.
MaxPooling
(
'pool2'
,
2
)
.
Conv2D
(
'conv3'
,
out_channel
=
64
,
kernel_shape
=
3
)
# the original arch is 2x faster
# .Conv2D('conv0', out_channel=32, kernel_shape=8, stride=4)
# .Conv2D('conv1', out_channel=64, kernel_shape=4, stride=2)
# .Conv2D('conv2', out_channel=64, kernel_shape=3)
# Nature architecture
.
Conv2D
(
'conv0'
,
out_channel
=
32
,
kernel_shape
=
8
,
stride
=
4
)
.
Conv2D
(
'conv1'
,
out_channel
=
64
,
kernel_shape
=
4
,
stride
=
2
)
.
Conv2D
(
'conv2'
,
out_channel
=
64
,
kernel_shape
=
3
)
# architecture used for the figure in the README, slower but takes fewer iterations to converge
# .Conv2D('conv0', out_channel=32, kernel_shape=5)
# .MaxPooling('pool0', 2)
# .Conv2D('conv1', out_channel=32, kernel_shape=5)
# .MaxPooling('pool1', 2)
# .Conv2D('conv2', out_channel=64, kernel_shape=4)
# .MaxPooling('pool2', 2)
# .Conv2D('conv3', out_channel=64, kernel_shape=3)
.
FullyConnected
(
'fc0'
,
512
,
nl
=
LeakyReLU
)())
if
self
.
method
!=
'Dueling'
:
...
...
@@ -98,8 +96,6 @@ class Model(DQNModel):
def
get_config
():
logger
.
auto_set_dir
()
M
=
Model
()
expreplay
=
ExpReplay
(
predictor_io_names
=
([
'state'
],
[
'Qvalue'
]),
...
...
@@ -108,10 +104,8 @@ def get_config():
batch_size
=
BATCH_SIZE
,
memory_size
=
MEMORY_SIZE
,
init_memory_size
=
INIT_MEMORY_SIZE
,
exploration
=
INIT_EXPLORATION
,
end_exploration
=
END_EXPLORATION
,
exploration_epoch_anneal
=
EXPLORATION_EPOCH_ANNEAL
,
update_frequency
=
4
,
init_exploration
=
1.0
,
update_frequency
=
UPDATE_FREQ
,
history_len
=
FRAME_HISTORY
)
...
...
@@ -119,18 +113,24 @@ def get_config():
dataflow
=
expreplay
,
callbacks
=
[
ModelSaver
(),
ScheduledHyperParamSetter
(
'learning_rate'
,
[(
150
,
4e-4
),
(
250
,
1e-4
),
(
350
,
5e-5
)]),
PeriodicTrigger
(
RunOp
(
DQNModel
.
update_target_param
),
every_k_steps
=
10000
//
UPDATE_FREQ
),
# update target network every 10k steps
expreplay
,
ScheduledHyperParamSetter
(
'learning_rate'
,
[(
60
,
4e-4
),
(
100
,
2e-4
)]),
ScheduledHyperParamSetter
(
ObjAttrParam
(
expreplay
,
'exploration'
),
[(
0
,
1
),
(
10
,
0.1
),
(
320
,
0.01
)],
# 1->0.1 in the first million steps
interp
=
'linear'
),
PeriodicTrigger
(
Evaluator
(
EVAL_EPISODE
,
[
'state'
],
[
'Qvalue'
],
get_player
),
every_k_epochs
=
5
),
# HumanHyperParamSetter('learning_rate', 'hyper.txt'),
# HumanHyperParamSetter(ObjAttrParam(expreplay, 'exploration'), 'hyper.txt'),
every_k_epochs
=
10
),
HumanHyperParamSetter
(
'learning_rate'
),
],
model
=
M
,
steps_per_epoch
=
STEPS_PER_EPOCH
,
max_epoch
=
1000
,
# run the simulator on a separate GPU if available
predict_tower
=
[
1
]
if
get_nr_gpu
()
>
1
else
[
0
],
)
...
...
@@ -170,6 +170,9 @@ if __name__ == '__main__':
elif
args
.
task
==
'eval'
:
eval_model_multithread
(
cfg
,
EVAL_EPISODE
,
get_player
)
else
:
logger
.
set_logger_dir
(
'train_log/DQN-{}'
.
format
(
os
.
path
.
basename
(
ROM_FILE
)
.
split
(
'.'
)[
0
]))
config
=
get_config
()
if
args
.
load
:
config
.
session_init
=
SaverRestore
(
args
.
load
)
...
...
examples/DeepQNetwork/README.md
View file @
399db3ae
...
...
@@ -19,13 +19,10 @@ Claimed performance in the paper can be reproduced, on several games I've tested

DQN typically took 1 day of training to reach a score of 400 on breakout game (same as the paper).
My Batch-A3C implementation only took <2 hours.
Both were trained on one GPU with an extra GPU for simulation.
On one TitanX, Double-DQN took 1 day of training to reach a score of 400 on breakout game.
Batch-A3C implementation only took <2 hours. (Both are trained with a larger network noted in the code).
Double-DQN runs at 18 batches/s (1152 frames/s) on TitanX.
Note that I wasn't using the network architecture in the paper.
If switched to the network in the paper it could run 2x faster.
Double-DQN runs at 60 batches (3840 trained frames, 240 seen frames, 960 game frames) per second on TitanX.
## How to use
...
...
examples/DeepQNetwork/expreplay.py
View file @
399db3ae
...
...
@@ -123,7 +123,7 @@ class ExpReplay(DataFlow, Callback):
state_shape
,
batch_size
,
memory_size
,
init_memory_size
,
exploration
,
end_exploration
,
exploration_epoch_anneal
,
init_exploration
,
update_frequency
,
history_len
):
"""
Args:
...
...
@@ -140,13 +140,13 @@ class ExpReplay(DataFlow, Callback):
for
k
,
v
in
locals
()
.
items
():
if
k
!=
'self'
:
setattr
(
self
,
k
,
v
)
self
.
exploration
=
init_exploration
self
.
num_actions
=
player
.
get_action_space
()
.
num_actions
()
logger
.
info
(
"Number of Legal actions: {}"
.
format
(
self
.
num_actions
))
self
.
rng
=
get_rng
(
self
)
self
.
_init_memory_flag
=
threading
.
Event
()
# tell if memory has been initialized
# TODO just use a semaphore?
# a queue to receive notifications to populate memory
self
.
_populate_job_queue
=
queue
.
Queue
(
maxsize
=
5
)
...
...
@@ -245,18 +245,15 @@ class ExpReplay(DataFlow, Callback):
self
.
_simulator_th
.
start
()
def
_trigger_epoch
(
self
):
if
self
.
exploration
>
self
.
end_exploration
:
self
.
exploration
-=
self
.
exploration_epoch_anneal
logger
.
info
(
"Exploration changed to {}"
.
format
(
self
.
exploration
))
# log player statistics
# log player statistics in training
stats
=
self
.
player
.
stats
for
k
,
v
in
six
.
iteritems
(
stats
):
try
:
mean
,
max
=
np
.
mean
(
v
),
np
.
max
(
v
)
self
.
trainer
.
add_scalar_summary
(
'expreplay/mean_'
+
k
,
mean
)
self
.
trainer
.
add_scalar_summary
(
'expreplay/max_'
+
k
,
max
)
self
.
trainer
.
monitors
.
put_scalar
(
'expreplay/mean_'
+
k
,
mean
)
self
.
trainer
.
monitors
.
put_scalar
(
'expreplay/max_'
+
k
,
max
)
except
:
pass
logger
.
exception
(
"Cannot log training scores."
)
self
.
player
.
reset_stat
()
...
...
tensorpack/callbacks/graph.py
View file @
399db3ae
...
...
@@ -27,7 +27,7 @@ class RunOp(Callback):
Examples:
The `DQN Example
<https://github.com/ppwwyyxx/tensorpack/blob/master/examples/
Atari2600/DQN.py#L182
>`_
<https://github.com/ppwwyyxx/tensorpack/blob/master/examples/
DeepQNetwork/
>`_
uses this callback to update target network.
"""
self
.
setup_func
=
setup_func
...
...
tensorpack/callbacks/param.py
View file @
399db3ae
...
...
@@ -217,8 +217,9 @@ class ScheduledHyperParamSetter(HyperParamSetter):
param: same as in :class:`HyperParamSetter`.
schedule (list): with the format ``[(epoch1, val1), (epoch2, val2), (epoch3, val3)]``.
Each ``(ep, val)`` pair means to set the param
to "val" __after__ the completion of `ep` th epoch.
If ep == 0, the value will be set before the first epoch.
to "val" __after__ the completion of epoch `ep`.
If ep == 0, the value will be set before the first epoch
(by default the first is epoch 1).
interp: None: no interpolation. 'linear': linear interpolation
Example:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment