Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
1f3eaf97
Commit
1f3eaf97
authored
Jul 21, 2016
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bug fix in Double-DQN
parent
d6f1b6ee
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
12 additions
and
8 deletions
+12
-8
examples/Atari2600/DQN.py
examples/Atari2600/DQN.py
+11
-8
tensorpack/utils/timer.py
tensorpack/utils/timer.py
+1
-0
No files found.
examples/Atari2600/DQN.py
View file @
1f3eaf97
...
@@ -92,24 +92,27 @@ class Model(ModelDesc):
...
@@ -92,24 +92,27 @@ class Model(ModelDesc):
def
_build_graph
(
self
,
inputs
,
is_training
):
def
_build_graph
(
self
,
inputs
,
is_training
):
state
,
action
,
reward
,
next_state
,
isOver
=
inputs
state
,
action
,
reward
,
next_state
,
isOver
=
inputs
self
.
predict_value
=
self
.
_get_DQN_prediction
(
state
,
is_training
)
self
.
predict_value
=
self
.
_get_DQN_prediction
(
state
,
is_training
)
action_onehot
=
tf
.
one_hot
(
action
,
NUM_ACTIONS
)
action_onehot
=
tf
.
one_hot
(
action
,
NUM_ACTIONS
,
1.0
,
0.0
)
pred_action_value
=
tf
.
reduce_sum
(
self
.
predict_value
*
action_onehot
,
1
)
#N,
pred_action_value
=
tf
.
reduce_sum
(
self
.
predict_value
*
action_onehot
,
1
)
#N,
max_pred_reward
=
tf
.
reduce_mean
(
tf
.
reduce_max
(
max_pred_reward
=
tf
.
reduce_mean
(
tf
.
reduce_max
(
self
.
predict_value
,
1
),
name
=
'predict_reward'
)
self
.
predict_value
,
1
),
name
=
'predict_reward'
)
add_moving_summary
(
max_pred_reward
)
add_moving_summary
(
max_pred_reward
)
self
.
greedy_choice
=
tf
.
argmax
(
self
.
predict_value
,
1
)
# N,
with
tf
.
variable_scope
(
'target'
):
with
tf
.
variable_scope
(
'target'
):
targetQ_predict_value
=
self
.
_get_DQN_prediction
(
next_state
,
False
)
# NxA
targetQ_predict_value
=
self
.
_get_DQN_prediction
(
next_state
,
False
)
# NxA
# DQN
# DQN
#best_v = tf.reduce_max(targetQ_predict_value, 1) # N,
#best_v = tf.reduce_max(targetQ_predict_value, 1) # N,
# Double-DQN
# Double-DQN
predict_onehot
=
tf
.
one_hot
(
self
.
greedy_choice
,
NUM_ACTIONS
,
1.0
,
0.0
)
tf
.
get_variable_scope
()
.
reuse_variables
()
best_v
=
tf
.
reduce_sum
(
targetQ_predict_value
*
predict_onehot
,
1
)
next_predict_value
=
self
.
_get_DQN_prediction
(
next_state
,
is_training
)
self
.
greedy_choice
=
tf
.
argmax
(
next_predict_value
,
1
)
# N,
predict_onehot
=
tf
.
one_hot
(
self
.
greedy_choice
,
NUM_ACTIONS
,
1.0
,
0.0
)
best_v
=
tf
.
reduce_sum
(
targetQ_predict_value
*
predict_onehot
,
1
)
target
=
reward
+
(
1.0
-
tf
.
cast
(
isOver
,
tf
.
float32
))
*
GAMMA
*
tf
.
stop_gradient
(
best_v
)
target
=
reward
+
(
1.0
-
tf
.
cast
(
isOver
,
tf
.
float32
))
*
GAMMA
*
tf
.
stop_gradient
(
best_v
)
sqrcost
=
tf
.
square
(
target
-
pred_action_value
)
sqrcost
=
tf
.
square
(
target
-
pred_action_value
)
abscost
=
tf
.
abs
(
target
-
pred_action_value
)
# robust error func
abscost
=
tf
.
abs
(
target
-
pred_action_value
)
# robust error func
...
...
tensorpack/utils/timer.py
View file @
1f3eaf97
...
@@ -17,6 +17,7 @@ __all__ = ['total_timer', 'timed_operation',
...
@@ -17,6 +17,7 @@ __all__ = ['total_timer', 'timed_operation',
'print_total_timer'
,
'IterSpeedCounter'
]
'print_total_timer'
,
'IterSpeedCounter'
]
class
IterSpeedCounter
(
object
):
class
IterSpeedCounter
(
object
):
""" To count how often some code gets reached"""
def
__init__
(
self
,
print_every
,
name
=
None
):
def
__init__
(
self
,
print_every
,
name
=
None
):
self
.
cnt
=
0
self
.
cnt
=
0
self
.
print_every
=
int
(
print_every
)
self
.
print_every
=
int
(
print_every
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment