Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
a8dfad63
Commit
a8dfad63
authored
May 30, 2017
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
match "steps"
parent
b61a2d89
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
17 additions
and
14 deletions
+17
-14
examples/DeepQNetwork/DQN.py
examples/DeepQNetwork/DQN.py
+12
-8
examples/DeepQNetwork/expreplay.py
examples/DeepQNetwork/expreplay.py
+4
-5
tensorpack/callbacks/graph.py
tensorpack/callbacks/graph.py
+1
-1
No files found.
examples/DeepQNetwork/DQN.py
View file @
a8dfad63
...
...
@@ -30,14 +30,15 @@ from expreplay import ExpReplay
BATCH_SIZE
=
64
IMAGE_SIZE
=
(
84
,
84
)
FRAME_HISTORY
=
4
ACTION_REPEAT
=
4
ACTION_REPEAT
=
4
# aka FRAME_SKIP
UPDATE_FREQ
=
4
GAMMA
=
0.99
MEMORY_SIZE
=
1e6
# NOTE: will consume at least 1e6 * 84 * 84 bytes == 6.6G memory.
INIT_MEMORY_SIZE
=
5e4
STEPS_PER_EPOCH
=
10000
STEPS_PER_EPOCH
=
10000
//
UPDATE_FREQ
*
10
# each epoch is 100k played frames
EVAL_EPISODE
=
50
NUM_ACTIONS
=
None
...
...
@@ -97,7 +98,7 @@ def get_config():
memory_size
=
MEMORY_SIZE
,
init_memory_size
=
INIT_MEMORY_SIZE
,
init_exploration
=
1.0
,
update_frequency
=
4
,
update_frequency
=
UPDATE_FREQ
,
history_len
=
FRAME_HISTORY
)
...
...
@@ -106,21 +107,24 @@ def get_config():
callbacks
=
[
ModelSaver
(),
ScheduledHyperParamSetter
(
'learning_rate'
,
[(
150
,
4e-4
),
(
250
,
1e-4
),
(
350
,
5e-5
)]),
[(
60
,
4e-4
),
(
100
,
2e-4
)]),
ScheduledHyperParamSetter
(
ObjAttrParam
(
expreplay
,
'exploration'
),
[(
0
,
1
),
(
10
0
,
0.1
),
(
20
0
,
0.01
)],
[(
0
,
1
),
(
10
,
0.1
),
(
24
0
,
0.01
)],
interp
=
'linear'
),
RunOp
(
DQNModel
.
update_target_param
),
PeriodicTrigger
(
RunOp
(
DQNModel
.
update_target_param
),
every_k_steps
=
10000
//
UPDATE_FREQ
),
expreplay
,
PeriodicTrigger
(
Evaluator
(
EVAL_EPISODE
,
[
'state'
],
[
'Qvalue'
],
get_player
),
every_k_epochs
=
5
),
# HumanHyperParamSetter('learning_rate', 'hyper.txt
'),
every_k_epochs
=
10
),
HumanHyperParamSetter
(
'learning_rate
'
),
# HumanHyperParamSetter(ObjAttrParam(expreplay, 'exploration'), 'hyper.txt'),
],
model
=
M
,
steps_per_epoch
=
STEPS_PER_EPOCH
,
max_epoch
=
3000
,
# run the simulator on a separate GPU if available
predict_tower
=
[
1
]
if
get_nr_gpu
()
>
1
else
[
0
],
)
...
...
examples/DeepQNetwork/expreplay.py
View file @
a8dfad63
...
...
@@ -147,7 +147,6 @@ class ExpReplay(DataFlow, Callback):
self
.
rng
=
get_rng
(
self
)
self
.
_init_memory_flag
=
threading
.
Event
()
# tell if memory has been initialized
# TODO just use a semaphore?
# a queue to receive notifications to populate memory
self
.
_populate_job_queue
=
queue
.
Queue
(
maxsize
=
5
)
...
...
@@ -246,15 +245,15 @@ class ExpReplay(DataFlow, Callback):
self
.
_simulator_th
.
start
()
def
_trigger_epoch
(
self
):
# log player statistics
# log player statistics
in training
stats
=
self
.
player
.
stats
for
k
,
v
in
six
.
iteritems
(
stats
):
try
:
mean
,
max
=
np
.
mean
(
v
),
np
.
max
(
v
)
self
.
trainer
.
add_scalar_summary
(
'expreplay/mean_'
+
k
,
mean
)
self
.
trainer
.
add_scalar_summary
(
'expreplay/max_'
+
k
,
max
)
self
.
trainer
.
monitors
.
put_scalar
(
'expreplay/mean_'
+
k
,
mean
)
self
.
trainer
.
monitors
.
put_scalar
(
'expreplay/max_'
+
k
,
max
)
except
:
pass
logger
.
exception
(
"Cannot log training scores."
)
self
.
player
.
reset_stat
()
...
...
tensorpack/callbacks/graph.py
View file @
a8dfad63
...
...
@@ -27,7 +27,7 @@ class RunOp(Callback):
Examples:
The `DQN Example
<https://github.com/ppwwyyxx/tensorpack/blob/master/examples/
Atari2600/DQN.py#L182
>`_
<https://github.com/ppwwyyxx/tensorpack/blob/master/examples/
DeepQNetwork/
>`_
uses this callback to update target network.
"""
self
.
setup_func
=
setup_func
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment