Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
1870496f
Commit
1870496f
authored
Aug 18, 2019
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[DQN] update docs; make eval configurable
parent
5fc1e2f9
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
10 additions
and
9 deletions
+10
-9
examples/DeepQNetwork/DQN.py
examples/DeepQNetwork/DQN.py
+3
-3
examples/DeepQNetwork/DQNModel.py
examples/DeepQNetwork/DQNModel.py
+1
-1
examples/DeepQNetwork/README.md
examples/DeepQNetwork/README.md
+6
-5
No files found.
examples/DeepQNetwork/DQN.py
View file @
1870496f
...
@@ -26,7 +26,6 @@ MEMORY_SIZE = 1e6
...
@@ -26,7 +26,6 @@ MEMORY_SIZE = 1e6
# will consume at least 1e6 * 84 * 84 bytes == 6.6G memory.
# will consume at least 1e6 * 84 * 84 bytes == 6.6G memory.
INIT_MEMORY_SIZE
=
MEMORY_SIZE
//
20
INIT_MEMORY_SIZE
=
MEMORY_SIZE
//
20
STEPS_PER_EPOCH
=
100000
//
UPDATE_FREQ
# each epoch is 100k state transitions
STEPS_PER_EPOCH
=
100000
//
UPDATE_FREQ
# each epoch is 100k state transitions
EVAL_EPISODE
=
50
NUM_PARALLEL_PLAYERS
=
3
NUM_PARALLEL_PLAYERS
=
3
USE_GYM
=
False
USE_GYM
=
False
...
@@ -135,7 +134,7 @@ def get_config(model):
...
@@ -135,7 +134,7 @@ def get_config(model):
[(
0
,
1
),
(
10
,
0.1
),
(
400
,
0.01
)],
# 1->0.1 in the first million steps
[(
0
,
1
),
(
10
,
0.1
),
(
400
,
0.01
)],
# 1->0.1 in the first million steps
interp
=
'linear'
),
interp
=
'linear'
),
PeriodicTrigger
(
Evaluator
(
PeriodicTrigger
(
Evaluator
(
EVAL_EPISODE
,
[
'state'
],
[
'Qvalue'
],
get_player
),
args
.
num_eval
,
[
'state'
],
[
'Qvalue'
],
get_player
),
every_k_epochs
=
5
if
'pong'
in
args
.
env
.
lower
()
else
10
),
# eval more frequently for easy games
every_k_epochs
=
5
if
'pong'
in
args
.
env
.
lower
()
else
10
),
# eval more frequently for easy games
],
],
steps_per_epoch
=
STEPS_PER_EPOCH
,
steps_per_epoch
=
STEPS_PER_EPOCH
,
...
@@ -153,6 +152,7 @@ if __name__ == '__main__':
...
@@ -153,6 +152,7 @@ if __name__ == '__main__':
help
=
'either an atari rom file (that ends with .bin) or a gym atari environment name'
)
help
=
'either an atari rom file (that ends with .bin) or a gym atari environment name'
)
parser
.
add_argument
(
'--algo'
,
help
=
'algorithm'
,
parser
.
add_argument
(
'--algo'
,
help
=
'algorithm'
,
choices
=
[
'DQN'
,
'Double'
,
'Dueling'
],
default
=
'Double'
)
choices
=
[
'DQN'
,
'Double'
,
'Dueling'
],
default
=
'Double'
)
parser
.
add_argument
(
'--num-eval'
,
default
=
50
,
type
=
int
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
if
args
.
gpu
:
if
args
.
gpu
:
os
.
environ
[
'CUDA_VISIBLE_DEVICES'
]
=
args
.
gpu
os
.
environ
[
'CUDA_VISIBLE_DEVICES'
]
=
args
.
gpu
...
@@ -177,7 +177,7 @@ if __name__ == '__main__':
...
@@ -177,7 +177,7 @@ if __name__ == '__main__':
if
args
.
task
==
'play'
:
if
args
.
task
==
'play'
:
play_n_episodes
(
get_player
(
viz
=
0.01
),
pred
,
100
,
render
=
True
)
play_n_episodes
(
get_player
(
viz
=
0.01
),
pred
,
100
,
render
=
True
)
elif
args
.
task
==
'eval'
:
elif
args
.
task
==
'eval'
:
eval_model_multithread
(
pred
,
EVAL_EPISODE
,
get_player
)
eval_model_multithread
(
pred
,
args
.
num_eval
,
get_player
)
else
:
else
:
logger
.
set_logger_dir
(
logger
.
set_logger_dir
(
os
.
path
.
join
(
'train_log'
,
'DQN-{}'
.
format
(
os
.
path
.
join
(
'train_log'
,
'DQN-{}'
.
format
(
...
...
examples/DeepQNetwork/DQNModel.py
View file @
1870496f
...
@@ -100,7 +100,7 @@ class Model(ModelDesc):
...
@@ -100,7 +100,7 @@ class Model(ModelDesc):
def
optimizer
(
self
):
def
optimizer
(
self
):
lr
=
tf
.
get_variable
(
'learning_rate'
,
initializer
=
1e-3
,
trainable
=
False
)
lr
=
tf
.
get_variable
(
'learning_rate'
,
initializer
=
1e-3
,
trainable
=
False
)
tf
.
summary
.
scalar
(
"learning_rate"
,
lr
)
tf
.
summary
.
scalar
(
"learning_rate
-summary
"
,
lr
)
opt
=
tf
.
train
.
RMSPropOptimizer
(
lr
,
decay
=
0.95
,
momentum
=
0.95
,
epsilon
=
1e-2
)
opt
=
tf
.
train
.
RMSPropOptimizer
(
lr
,
decay
=
0.95
,
momentum
=
0.95
,
epsilon
=
1e-2
)
return
optimizer
.
apply_grad_processors
(
opt
,
[
gradproc
.
SummaryGradient
()])
return
optimizer
.
apply_grad_processors
(
opt
,
[
gradproc
.
SummaryGradient
()])
...
...
examples/DeepQNetwork/README.md
View file @
1870496f
...
@@ -54,10 +54,11 @@ Claimed performance in the paper can be reproduced, on several games I've tested
...
@@ -54,10 +54,11 @@ Claimed performance in the paper can be reproduced, on several games I've tested


| Environment | Avg Score | Download |
| Environment | Avg Score | Download |
|:--------------
|:---------:|:
-------------------------------------------------------------------------------------:|
|:--------------
-|:---------:|:-
-------------------------------------------------------------------------------------:|
| breakout.bin | 465 |
[
:arrow_down:
](
http://models.tensorpack.com/DeepQNetwork/DoubleDQN-breakout.bin.npz
)
|
| breakout.bin | 465 |
[
:arrow_down:
](
http://models.tensorpack.com/DeepQNetwork/DoubleDQN-breakout.bin.npz
)
|
| seaquest.bin | 8686 |
[
:arrow_down:
](
http://models.tensorpack.com/DeepQNetwork/DoubleDQN-seaquest.bin.npz
)
|
| seaquest.bin | 8686 |
[
:arrow_down:
](
http://models.tensorpack.com/DeepQNetwork/DoubleDQN-seaquest.bin.npz
)
|
| ms_pacman.bin | 3323 |
[
:arrow_down:
](
http://models.tensorpack.com/DeepQNetwork/DoubleDQN-ms_pacman.bin.npz
)
|
| ms_pacman.bin | 3323 |
[
:arrow_down:
](
http://models.tensorpack.com/DeepQNetwork/DoubleDQN-ms_pacman.bin.npz
)
|
| beam_rider.bin | 15835 |
[
:arrow_down:
](
http://models.tensorpack.com/DeepQNetwork/DoubleDQN-beam_rider.bin.npz
)
|
## Speed
## Speed
On one GTX 1080Ti,
On one GTX 1080Ti,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment