Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
ec726f6c
Commit
ec726f6c
authored
May 31, 2016
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
clean-ups in DQN
parent
da9b1b2f
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
8 deletions
+14
-8
examples/Atari2600/DQN.py
examples/Atari2600/DQN.py
+7
-7
tensorpack/RL/simulator.py
tensorpack/RL/simulator.py
+7
-1
No files found.
examples/Atari2600/DQN.py
View file @
ec726f6c
...
...
@@ -30,8 +30,9 @@ from tensorpack.RL import *
"""
Implement DQN in:
Human-level control through deep reinforcement learning
for atari games
Human-level Control Through Deep Reinforcement Learning
for atari games. Use the variants in:
Deep Reinforcement Learning with Double Q-learning.
"""
BATCH_SIZE
=
32
...
...
@@ -139,10 +140,8 @@ class Model(ModelDesc):
tf
.
clip_by_global_norm
([
grad
],
5
)[
0
][
0
]),
SummaryGradient
()]
def
current_predictor
(
state
):
pred_var
=
tf
.
get_default_graph
()
.
get_tensor_by_name
(
'fct/output:0'
)
pred
=
pred_var
.
eval
(
feed_dict
=
{
'state:0'
:
[
state
]})
return
pred
[
0
]
def
predictor
(
self
,
state
):
return
self
.
predict_value
.
eval
(
feed_dict
=
{
'state:0'
:
[
state
]})[
0
]
def
play_one_episode
(
player
,
func
,
verbose
=
False
):
while
True
:
...
...
@@ -237,7 +236,7 @@ def get_config():
M
=
Model
()
dataset_train
=
ExpReplay
(
predictor
=
current_
predictor
,
predictor
=
M
.
predictor
,
player
=
get_player
(
train
=
True
),
num_actions
=
NUM_ACTIONS
,
memory_size
=
MEMORY_SIZE
,
...
...
@@ -246,6 +245,7 @@ def get_config():
exploration
=
INIT_EXPLORATION
,
end_exploration
=
END_EXPLORATION
,
exploration_epoch_anneal
=
EXPLORATION_EPOCH_ANNEAL
,
update_frequency
=
4
,
reward_clip
=
(
-
1
,
1
),
history_len
=
FRAME_HISTORY
)
...
...
tensorpack/RL/simulator.py
View file @
ec726f6c
...
...
@@ -5,7 +5,6 @@
import
multiprocessing
import
threading
import
zmq
import
weakref
from
abc
import
abstractmethod
,
ABCMeta
from
collections
import
defaultdict
,
namedtuple
...
...
@@ -15,6 +14,13 @@ from tensorpack.utils.concurrency import *
__all__
=
[
'SimulatorProcess'
,
'SimulatorMaster'
]
try
:
import
zmq
except
ImportError
:
logger
.
warn
(
"Error in 'import zmq'. RL simulator won't be available."
)
__all__
=
[]
class
SimulatorProcess
(
multiprocessing
.
Process
):
""" A process that simulates a player """
__metaclass__
=
ABCMeta
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment