Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
80723d31
Commit
80723d31
authored
May 28, 2016
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
gpu eval & one_hot
parent
f15c2181
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
25 additions
and
30 deletions
+25
-30
examples/Atari2600/DQN.py
examples/Atari2600/DQN.py
+13
-18
tensorpack/predict/common.py
tensorpack/predict/common.py
+2
-1
tensorpack/tfutils/symbolic_functions.py
tensorpack/tfutils/symbolic_functions.py
+3
-9
tensorpack/utils/utils.py
tensorpack/utils/utils.py
+7
-2
No files found.
examples/Atari2600/DQN.py
View file @
80723d31
...
...
@@ -22,7 +22,7 @@ from tensorpack.predict import PredictConfig, get_predict_func, MultiProcessPred
from
tensorpack.tfutils
import
symbolic_functions
as
symbf
from
tensorpack.callbacks
import
*
from
tensorpack.RL
import
AtariPlayer
,
ExpReplay
from
tensorpack.RL
import
*
"""
Implement DQN in:
...
...
@@ -43,7 +43,7 @@ EXPLORATION_EPOCH_ANNEAL = 0.008
END_EXPLORATION
=
0.1
MEMORY_SIZE
=
1e6
INIT_MEMORY_SIZE
=
500
00
INIT_MEMORY_SIZE
=
500
STEP_PER_EPOCH
=
10000
EVAL_EPISODE
=
100
...
...
@@ -86,7 +86,7 @@ class Model(ModelDesc):
def
_build_graph
(
self
,
inputs
,
is_training
):
state
,
action
,
reward
,
next_state
,
isOver
=
inputs
self
.
predict_value
=
self
.
_get_DQN_prediction
(
state
,
is_training
)
action_onehot
=
symbf
.
one_hot
(
action
,
NUM_ACTIONS
)
action_onehot
=
tf
.
one_hot
(
action
,
NUM_ACTIONS
,
1.0
,
0.0
)
pred_action_value
=
tf
.
reduce_sum
(
self
.
predict_value
*
action_onehot
,
1
)
#Nx1
max_pred_reward
=
tf
.
reduce_mean
(
tf
.
reduce_max
(
self
.
predict_value
,
1
),
name
=
'predict_reward'
)
...
...
@@ -128,7 +128,6 @@ def current_predictor(state):
def
play_one_episode
(
player
,
func
,
verbose
=
False
):
tot_reward
=
0
que
=
deque
(
maxlen
=
30
)
while
True
:
s
=
player
.
current_state
()
outputs
=
func
([[
s
]])
...
...
@@ -138,10 +137,6 @@ def play_one_episode(player, func, verbose=False):
print
action_value
,
act
if
random
.
random
()
<
0.01
:
act
=
random
.
choice
(
range
(
NUM_ACTIONS
))
if
len
(
que
)
==
que
.
maxlen
\
and
que
.
count
(
que
[
0
])
==
que
.
maxlen
:
act
=
1
# hack, avoid stuck
que
.
append
(
act
)
if
verbose
:
print
(
act
)
reward
,
isOver
=
player
.
action
(
act
)
...
...
@@ -150,7 +145,7 @@ def play_one_episode(player, func, verbose=False):
return
tot_reward
def
play_model
(
model_path
):
player
=
HistoryFramePlayer
(
get_player
(
0.01
),
FRAME_HISTORY
)
player
=
PreventStuckPlayer
(
HistoryFramePlayer
(
get_player
(
0.01
),
FRAME_HISTORY
),
30
,
1
)
cfg
=
PredictConfig
(
model
=
Model
(),
input_data_mapping
=
[
0
],
...
...
@@ -162,9 +157,8 @@ def play_model(model_path):
print
(
"Total:"
,
score
)
def
eval_model_multiprocess
(
model_path
):
M
=
Model
()
cfg
=
PredictConfig
(
model
=
M
,
model
=
M
odel
()
,
input_data_mapping
=
[
0
],
session_init
=
SaverRestore
(
model_path
),
output_var_names
=
[
'fct/output:0'
])
...
...
@@ -175,17 +169,16 @@ def eval_model_multiprocess(model_path):
self
.
outq
=
outqueue
def
run
(
self
):
player
=
HistoryFramePlayer
(
get_player
(),
FRAME_HISTORY
)
player
=
PreventStuckPlayer
(
HistoryFramePlayer
(
get_player
(),
FRAME_HISTORY
),
30
,
1
)
self
.
_init_runtime
()
while
True
:
score
=
play_one_episode
(
player
,
self
.
func
)
self
.
outq
.
put
(
score
)
NR_PROC
=
min
(
multiprocessing
.
cpu_count
()
//
2
,
10
)
procs
=
[]
NR_PROC
=
min
(
multiprocessing
.
cpu_count
()
//
2
,
8
)
q
=
multiprocessing
.
Queue
()
for
k
in
range
(
NR_PROC
):
procs
.
append
(
Worker
(
k
,
-
1
,
cfg
,
q
))
gpuid
=
get_gpus
()[
0
]
procs
=
[
Worker
(
k
,
gpuid
,
cfg
,
q
)
for
k
in
range
(
NR_PROC
)]
ensure_proc_terminate
(
procs
)
for
k
in
procs
:
k
.
start
()
...
...
@@ -202,8 +195,8 @@ class Evaluator(Callback):
def
_trigger_epoch
(
self
):
logger
.
info
(
"Evaluating..."
)
output
=
subproc_call
(
"
CUDA_VISIBLE_DEVICES=
{} --task eval --rom {} --load {}"
.
format
(
sys
.
argv
[
0
],
romfile
,
os
.
path
.
join
(
logger
.
LOG_DIR
,
'checkpoint'
)),
"{} --task eval --rom {} --load {}"
.
format
(
sys
.
argv
[
0
],
ROM_FILE
,
os
.
path
.
join
(
logger
.
LOG_DIR
,
'checkpoint'
)),
timeout
=
10
*
60
)
if
output
:
last
=
output
.
strip
()
.
split
(
'
\n
'
)[
-
1
]
...
...
@@ -246,6 +239,8 @@ def get_config():
dataset_train
,
PeriodicCallback
(
Evaluator
(),
2
),
]),
# save memory for multiprocess evaluator
session_config
=
get_default_sess_config
(
0.3
),
model
=
M
,
step_per_epoch
=
STEP_PER_EPOCH
,
)
...
...
tensorpack/predict/common.py
View file @
80723d31
...
...
@@ -78,7 +78,8 @@ def get_predict_func(config):
output_vars
=
[
tf
.
get_default_graph
()
.
get_tensor_by_name
(
get_op_var_name
(
n
)[
1
])
for
n
in
output_var_names
]
sess
=
tf
.
Session
()
# start with minimal memory, but allow growth
sess
=
tf
.
Session
(
config
=
get_default_sess_config
(
0.01
))
config
.
session_init
.
init
(
sess
)
def
run_input
(
dp
):
...
...
tensorpack/tfutils/symbolic_functions.py
View file @
80723d31
...
...
@@ -4,6 +4,7 @@
import
tensorflow
as
tf
import
numpy
as
np
from
..utils
import
logger
def
one_hot
(
y
,
num_labels
):
"""
...
...
@@ -11,15 +12,8 @@ def one_hot(y, num_labels):
:param num_labels: an int. number of output classes
:returns: an NxC onehot matrix.
"""
with
tf
.
op_scope
([
y
,
num_labels
],
'one_hot'
):
batch_size
=
tf
.
size
(
y
)
y
=
tf
.
expand_dims
(
y
,
1
)
indices
=
tf
.
expand_dims
(
tf
.
range
(
0
,
batch_size
),
1
)
concated
=
tf
.
concat
(
1
,
[
indices
,
y
])
onehot_labels
=
tf
.
sparse_to_dense
(
concated
,
tf
.
pack
([
batch_size
,
num_labels
]),
1.0
,
0.0
)
onehot_labels
.
set_shape
([
None
,
num_labels
])
return
tf
.
cast
(
onehot_labels
,
tf
.
float32
)
logger
.
warn
(
"symbf.one_hot is deprecated in favor of more general tf.one_hot"
)
return
tf
.
one_hot
(
y
,
num_labels
,
1.0
,
0.0
,
name
=
'one_hot'
)
def
prediction_incorrect
(
logits
,
label
,
topk
=
1
):
"""
...
...
tensorpack/utils/utils.py
View file @
80723d31
...
...
@@ -12,7 +12,7 @@ import numpy as np
from
.
import
logger
__all__
=
[
'timed_operation'
,
'change_env'
,
'get_rng'
,
'memoized'
,
'get_nr_gpu'
]
'get_rng'
,
'memoized'
,
'get_nr_gpu'
,
'get_gpus'
]
#def expand_dim_if_necessary(var, dp):
# """
...
...
@@ -83,5 +83,10 @@ def get_rng(self):
def
get_nr_gpu
():
env
=
os
.
environ
[
'CUDA_VISIBLE_DEVICES'
]
assert
env
is
not
None
assert
env
is
not
None
# TODO
return
len
(
env
.
split
(
','
))
def
get_gpus
():
env
=
os
.
environ
[
'CUDA_VISIBLE_DEVICES'
]
assert
env
is
not
None
# TODO
return
map
(
int
,
env
.
strip
()
.
split
(
','
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment