Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
941093a9
Commit
941093a9
authored
Sep 17, 2016
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
gym a3c
parent
be3409fb
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
269 additions
and
17 deletions
+269
-17
examples/Atari2600/common.py
examples/Atari2600/common.py
+4
-3
examples/OpenAIGym/common.py
examples/OpenAIGym/common.py
+1
-0
examples/OpenAIGym/run-atari.py
examples/OpenAIGym/run-atari.py
+7
-13
examples/OpenAIGym/train-atari.py
examples/OpenAIGym/train-atari.py
+256
-0
tensorpack/dataflow/dataset/bsds500.py
tensorpack/dataflow/dataset/bsds500.py
+1
-1
No files found.
examples/Atari2600/common.py
View file @
941093a9
...
@@ -15,9 +15,9 @@ from tensorpack.utils.stat import *
...
@@ -15,9 +15,9 @@ from tensorpack.utils.stat import *
from
tensorpack.callbacks
import
*
from
tensorpack.callbacks
import
*
global
get_player
global
get_player
get_player
=
None
def
play_one_episode
(
player
,
func
,
verbose
=
False
):
def
play_one_episode
(
player
,
func
,
verbose
=
False
):
# 0.01-greedy evaluation
def
f
(
s
):
def
f
(
s
):
spc
=
player
.
get_action_space
()
spc
=
player
.
get_action_space
()
act
=
func
([[
s
]])[
0
][
0
]
.
argmax
()
act
=
func
([[
s
]])[
0
][
0
]
.
argmax
()
...
@@ -62,6 +62,7 @@ def eval_with_funcs(predict_funcs, nr_eval):
...
@@ -62,6 +62,7 @@ def eval_with_funcs(predict_funcs, nr_eval):
for
k
in
threads
:
for
k
in
threads
:
k
.
start
()
k
.
start
()
time
.
sleep
(
0.1
)
# avoid simulator bugs
stat
=
StatCounter
()
stat
=
StatCounter
()
try
:
try
:
for
_
in
tqdm
(
range
(
nr_eval
),
**
get_tqdm_kwargs
()):
for
_
in
tqdm
(
range
(
nr_eval
),
**
get_tqdm_kwargs
()):
...
@@ -98,7 +99,7 @@ class Evaluator(Callback):
...
@@ -98,7 +99,7 @@ class Evaluator(Callback):
t
=
time
.
time
()
t
=
time
.
time
()
mean
,
max
=
eval_with_funcs
(
self
.
pred_funcs
,
nr_eval
=
self
.
eval_episode
)
mean
,
max
=
eval_with_funcs
(
self
.
pred_funcs
,
nr_eval
=
self
.
eval_episode
)
t
=
time
.
time
()
-
t
t
=
time
.
time
()
-
t
if
t
>
8
*
60
:
# eval takes too long
if
t
>
10
*
60
:
# eval takes too long
self
.
eval_episode
=
int
(
self
.
eval_episode
*
0.
89
)
self
.
eval_episode
=
int
(
self
.
eval_episode
*
0.
94
)
self
.
trainer
.
write_scalar_summary
(
'mean_score'
,
mean
)
self
.
trainer
.
write_scalar_summary
(
'mean_score'
,
mean
)
self
.
trainer
.
write_scalar_summary
(
'max_score'
,
max
)
self
.
trainer
.
write_scalar_summary
(
'max_score'
,
max
)
examples/OpenAIGym/common.py
0 → 120000
View file @
941093a9
../
Atari2600
/
common
.
py
\ No newline at end of file
examples/OpenAIGym/run-atari.py
View file @
941093a9
...
@@ -22,6 +22,8 @@ IMAGE_SHAPE3 = IMAGE_SIZE + (CHANNEL,)
...
@@ -22,6 +22,8 @@ IMAGE_SHAPE3 = IMAGE_SIZE + (CHANNEL,)
NUM_ACTIONS
=
None
NUM_ACTIONS
=
None
ENV_NAME
=
None
ENV_NAME
=
None
from
common
import
play_one_episode
def
get_player
(
dumpdir
=
None
):
def
get_player
(
dumpdir
=
None
):
pl
=
GymEnv
(
ENV_NAME
,
dumpdir
=
dumpdir
,
auto_restart
=
False
)
pl
=
GymEnv
(
ENV_NAME
,
dumpdir
=
dumpdir
,
auto_restart
=
False
)
pl
=
MapPlayerState
(
pl
,
lambda
img
:
cv2
.
resize
(
img
,
IMAGE_SIZE
[::
-
1
]))
pl
=
MapPlayerState
(
pl
,
lambda
img
:
cv2
.
resize
(
img
,
IMAGE_SIZE
[::
-
1
]))
...
@@ -40,7 +42,6 @@ class Model(ModelDesc):
...
@@ -40,7 +42,6 @@ class Model(ModelDesc):
InputVar
(
tf
.
float32
,
(
None
,),
'futurereward'
)
]
InputVar
(
tf
.
float32
,
(
None
,),
'futurereward'
)
]
def
_get_NN_prediction
(
self
,
image
):
def
_get_NN_prediction
(
self
,
image
):
""" image: [0,255]"""
image
=
image
/
255.0
image
=
image
/
255.0
with
argscope
(
Conv2D
,
nl
=
tf
.
nn
.
relu
):
with
argscope
(
Conv2D
,
nl
=
tf
.
nn
.
relu
):
l
=
Conv2D
(
'conv0'
,
image
,
out_channel
=
32
,
kernel_shape
=
5
)
l
=
Conv2D
(
'conv0'
,
image
,
out_channel
=
32
,
kernel_shape
=
5
)
...
@@ -61,17 +62,6 @@ class Model(ModelDesc):
...
@@ -61,17 +62,6 @@ class Model(ModelDesc):
policy
=
self
.
_get_NN_prediction
(
state
)
policy
=
self
.
_get_NN_prediction
(
state
)
self
.
logits
=
tf
.
nn
.
softmax
(
policy
,
name
=
'logits'
)
self
.
logits
=
tf
.
nn
.
softmax
(
policy
,
name
=
'logits'
)
def
play_one_episode
(
player
,
func
,
verbose
=
False
):
def
f
(
s
):
spc
=
player
.
get_action_space
()
act
=
func
([[
s
]])[
0
][
0
]
.
argmax
()
if
random
.
random
()
<
0.001
:
act
=
spc
.
sample
()
if
verbose
:
print
(
act
)
return
act
return
np
.
mean
(
player
.
play_one_episode
(
f
))
def
run_submission
(
cfg
):
def
run_submission
(
cfg
):
dirname
=
'gym-submit'
dirname
=
'gym-submit'
player
=
get_player
(
dumpdir
=
dirname
)
player
=
get_player
(
dumpdir
=
dirname
)
...
@@ -80,7 +70,11 @@ def run_submission(cfg):
...
@@ -80,7 +70,11 @@ def run_submission(cfg):
if
k
!=
0
:
if
k
!=
0
:
player
.
restart_episode
()
player
.
restart_episode
()
score
=
play_one_episode
(
player
,
predfunc
)
score
=
play_one_episode
(
player
,
predfunc
)
print
(
"Score:"
,
score
)
print
(
"Total:"
,
score
)
def
do_submit
():
dirname
=
'gym-submit'
gym
.
upload
(
dirname
,
api_key
=
'xxx'
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
...
...
examples/OpenAIGym/train-atari.py
0 → 100755
View file @
941093a9
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
# File: train-atari.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import
numpy
as
np
import
tensorflow
as
tf
import
os
,
sys
,
re
,
time
import
random
import
uuid
import
argparse
import
multiprocessing
,
threading
from
collections
import
deque
import
six
from
six.moves
import
queue
from
tensorpack
import
*
from
tensorpack.utils
import
*
from
tensorpack.utils.concurrency
import
*
from
tensorpack.utils.serialize
import
*
from
tensorpack.utils.timer
import
*
from
tensorpack.utils.stat
import
*
from
tensorpack.tfutils
import
symbolic_functions
as
symbf
from
tensorpack.RL
import
*
import
common
from
common
import
(
play_model
,
Evaluator
,
eval_model_multithread
)
IMAGE_SIZE
=
(
84
,
84
)
FRAME_HISTORY
=
4
GAMMA
=
0.99
CHANNEL
=
FRAME_HISTORY
*
3
IMAGE_SHAPE3
=
IMAGE_SIZE
+
(
CHANNEL
,)
LOCAL_TIME_MAX
=
5
STEP_PER_EPOCH
=
6000
EVAL_EPISODE
=
50
BATCH_SIZE
=
128
SIMULATOR_PROC
=
50
PREDICTOR_THREAD_PER_GPU
=
2
PREDICTOR_THREAD
=
None
EVALUATE_PROC
=
min
(
multiprocessing
.
cpu_count
()
//
2
,
20
)
NUM_ACTIONS
=
None
ENV_NAME
=
None
def
get_player
(
viz
=
False
,
train
=
False
,
dumpdir
=
None
):
pl
=
GymEnv
(
ENV_NAME
,
dumpdir
=
dumpdir
)
def
func
(
img
):
return
cv2
.
resize
(
img
,
IMAGE_SIZE
[::
-
1
])
pl
=
MapPlayerState
(
pl
,
func
)
global
NUM_ACTIONS
NUM_ACTIONS
=
pl
.
get_action_space
()
.
num_actions
()
pl
=
HistoryFramePlayer
(
pl
,
FRAME_HISTORY
)
if
not
train
:
pl
=
PreventStuckPlayer
(
pl
,
30
,
1
)
pl
=
LimitLengthPlayer
(
pl
,
40000
)
return
pl
common
.
get_player
=
get_player
class
MySimulatorWorker
(
SimulatorProcess
):
def
_build_player
(
self
):
return
get_player
(
train
=
True
)
class
Model
(
ModelDesc
):
def
_get_input_vars
(
self
):
assert
NUM_ACTIONS
is
not
None
return
[
InputVar
(
tf
.
float32
,
(
None
,)
+
IMAGE_SHAPE3
,
'state'
),
InputVar
(
tf
.
int32
,
(
None
,),
'action'
),
InputVar
(
tf
.
float32
,
(
None
,),
'futurereward'
)
]
def
_get_NN_prediction
(
self
,
image
,
is_training
):
image
=
image
/
255.0
with
argscope
(
Conv2D
,
nl
=
tf
.
nn
.
relu
):
l
=
Conv2D
(
'conv0'
,
image
,
out_channel
=
32
,
kernel_shape
=
5
)
l
=
MaxPooling
(
'pool0'
,
l
,
2
)
l
=
Conv2D
(
'conv1'
,
l
,
out_channel
=
32
,
kernel_shape
=
5
)
l
=
MaxPooling
(
'pool1'
,
l
,
2
)
l
=
Conv2D
(
'conv2'
,
l
,
out_channel
=
64
,
kernel_shape
=
4
)
l
=
MaxPooling
(
'pool2'
,
l
,
2
)
l
=
Conv2D
(
'conv3'
,
l
,
out_channel
=
64
,
kernel_shape
=
3
)
l
=
FullyConnected
(
'fc0'
,
l
,
512
,
nl
=
tf
.
identity
)
l
=
PReLU
(
'prelu'
,
l
)
policy
=
FullyConnected
(
'fc-pi'
,
l
,
out_dim
=
NUM_ACTIONS
,
nl
=
tf
.
identity
)
value
=
FullyConnected
(
'fc-v'
,
l
,
1
,
nl
=
tf
.
identity
)
return
policy
,
value
def
_build_graph
(
self
,
inputs
,
is_training
):
state
,
action
,
futurereward
=
inputs
policy
,
self
.
value
=
self
.
_get_NN_prediction
(
state
,
is_training
)
self
.
value
=
tf
.
squeeze
(
self
.
value
,
[
1
],
name
=
'pred_value'
)
# (B,)
self
.
logits
=
tf
.
nn
.
softmax
(
policy
,
name
=
'logits'
)
expf
=
tf
.
get_variable
(
'explore_factor'
,
shape
=
[],
initializer
=
tf
.
constant_initializer
(
1
),
trainable
=
False
)
logitsT
=
tf
.
nn
.
softmax
(
policy
*
expf
,
name
=
'logitsT'
)
if
not
is_training
:
return
log_probs
=
tf
.
log
(
self
.
logits
+
1e-6
)
log_pi_a_given_s
=
tf
.
reduce_sum
(
log_probs
*
tf
.
one_hot
(
tf
.
cast
(
action
,
tf
.
int64
),
NUM_ACTIONS
,
1.0
,
0.0
),
1
)
advantage
=
tf
.
sub
(
tf
.
stop_gradient
(
self
.
value
),
futurereward
,
name
=
'advantage'
)
policy_loss
=
tf
.
reduce_sum
(
log_pi_a_given_s
*
advantage
,
name
=
'policy_loss'
)
xentropy_loss
=
tf
.
reduce_sum
(
self
.
logits
*
log_probs
,
name
=
'xentropy_loss'
)
value_loss
=
tf
.
nn
.
l2_loss
(
self
.
value
-
futurereward
,
name
=
'value_loss'
)
pred_reward
=
tf
.
reduce_mean
(
self
.
value
,
name
=
'predict_reward'
)
advantage
=
tf
.
sqrt
(
tf
.
reduce_mean
(
tf
.
square
(
advantage
)),
name
=
'rms_advantage'
)
summary
.
add_moving_summary
(
policy_loss
,
xentropy_loss
,
value_loss
,
pred_reward
,
advantage
)
entropy_beta
=
tf
.
get_variable
(
'entropy_beta'
,
shape
=
[],
initializer
=
tf
.
constant_initializer
(
0.01
),
trainable
=
False
)
self
.
cost
=
tf
.
add_n
([
policy_loss
,
xentropy_loss
*
entropy_beta
,
value_loss
])
self
.
cost
=
tf
.
truediv
(
self
.
cost
,
tf
.
cast
(
tf
.
shape
(
futurereward
)[
0
],
tf
.
float32
),
name
=
'cost'
)
def
get_gradient_processor
(
self
):
return
[
MapGradient
(
lambda
grad
:
tf
.
clip_by_average_norm
(
grad
,
0.1
)),
SummaryGradient
()]
class
MySimulatorMaster
(
SimulatorMaster
,
Callback
):
def
__init__
(
self
,
pipe_c2s
,
pipe_s2c
,
model
):
super
(
MySimulatorMaster
,
self
)
.
__init__
(
pipe_c2s
,
pipe_s2c
)
self
.
M
=
model
self
.
queue
=
queue
.
Queue
(
maxsize
=
BATCH_SIZE
*
8
*
2
)
def
_setup_graph
(
self
):
self
.
sess
=
self
.
trainer
.
sess
self
.
async_predictor
=
MultiThreadAsyncPredictor
(
self
.
trainer
.
get_predict_funcs
([
'state'
],
[
'logitsT'
,
'pred_value'
],
PREDICTOR_THREAD
),
batch_size
=
15
)
self
.
async_predictor
.
run
()
def
_on_state
(
self
,
state
,
ident
):
def
cb
(
outputs
):
distrib
,
value
=
outputs
.
result
()
assert
np
.
all
(
np
.
isfinite
(
distrib
))
action
=
np
.
random
.
choice
(
len
(
distrib
),
p
=
distrib
)
client
=
self
.
clients
[
ident
]
client
.
memory
.
append
(
TransitionExperience
(
state
,
action
,
None
,
value
=
value
))
self
.
send_queue
.
put
([
ident
,
dumps
(
action
)])
self
.
async_predictor
.
put_task
([
state
],
cb
)
def
_on_episode_over
(
self
,
ident
):
self
.
_parse_memory
(
0
,
ident
,
True
)
def
_on_datapoint
(
self
,
ident
):
client
=
self
.
clients
[
ident
]
if
len
(
client
.
memory
)
==
LOCAL_TIME_MAX
+
1
:
R
=
client
.
memory
[
-
1
]
.
value
self
.
_parse_memory
(
R
,
ident
,
False
)
def
_parse_memory
(
self
,
init_r
,
ident
,
isOver
):
client
=
self
.
clients
[
ident
]
mem
=
client
.
memory
if
not
isOver
:
last
=
mem
[
-
1
]
mem
=
mem
[:
-
1
]
mem
.
reverse
()
R
=
float
(
init_r
)
for
idx
,
k
in
enumerate
(
mem
):
R
=
np
.
clip
(
k
.
reward
,
-
1
,
1
)
+
GAMMA
*
R
self
.
queue
.
put
([
k
.
state
,
k
.
action
,
R
])
if
not
isOver
:
client
.
memory
=
[
last
]
else
:
client
.
memory
=
[]
def
get_config
():
logger
.
auto_set_dir
()
M
=
Model
()
name_base
=
str
(
uuid
.
uuid1
())[:
6
]
PIPE_DIR
=
os
.
environ
.
get
(
'TENSORPACK_PIPEDIR'
,
'.'
)
.
rstrip
(
'/'
)
namec2s
=
'ipc://{}/sim-c2s-{}'
.
format
(
PIPE_DIR
,
name_base
)
names2c
=
'ipc://{}/sim-s2c-{}'
.
format
(
PIPE_DIR
,
name_base
)
procs
=
[
MySimulatorWorker
(
k
,
namec2s
,
names2c
)
for
k
in
range
(
SIMULATOR_PROC
)]
ensure_proc_terminate
(
procs
)
start_proc_mask_signal
(
procs
)
master
=
MySimulatorMaster
(
namec2s
,
names2c
,
M
)
dataflow
=
BatchData
(
DataFromQueue
(
master
.
queue
),
BATCH_SIZE
)
lr
=
tf
.
Variable
(
0.001
,
trainable
=
False
,
name
=
'learning_rate'
)
tf
.
scalar_summary
(
'learning_rate'
,
lr
)
return
TrainConfig
(
dataset
=
dataflow
,
optimizer
=
tf
.
train
.
AdamOptimizer
(
lr
,
epsilon
=
1e-3
),
callbacks
=
Callbacks
([
StatPrinter
(),
ModelSaver
(),
ScheduledHyperParamSetter
(
'learning_rate'
,
[(
80
,
0.0003
),
(
120
,
0.0001
)]),
ScheduledHyperParamSetter
(
'entropy_beta'
,
[(
80
,
0.005
)]),
ScheduledHyperParamSetter
(
'explore_factor'
,
[(
80
,
2
),
(
100
,
3
),
(
120
,
4
),
(
140
,
5
)]),
HumanHyperParamSetter
(
'learning_rate'
),
HumanHyperParamSetter
(
'entropy_beta'
),
HumanHyperParamSetter
(
'explore_factor'
),
master
,
PeriodicCallback
(
Evaluator
(
EVAL_EPISODE
,
[
'state'
],
[
'logits'
]),
2
),
]),
extra_threads_procs
=
[
master
],
session_config
=
get_default_sess_config
(
0.5
),
model
=
M
,
step_per_epoch
=
STEP_PER_EPOCH
,
max_epoch
=
1000
,
)
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--gpu'
,
help
=
'comma separated list of GPU(s) to use.'
)
# nargs='*' in multi mode
parser
.
add_argument
(
'--load'
,
help
=
'load model'
)
parser
.
add_argument
(
'--env'
,
help
=
'env'
,
required
=
True
)
parser
.
add_argument
(
'--task'
,
help
=
'task to perform'
,
choices
=
[
'play'
,
'eval'
,
'train'
],
default
=
'train'
)
args
=
parser
.
parse_args
()
ENV_NAME
=
args
.
env
p
=
get_player
();
del
p
# set NUM_ACTIONS
if
args
.
gpu
:
os
.
environ
[
'CUDA_VISIBLE_DEVICES'
]
=
args
.
gpu
if
args
.
task
!=
'train'
:
assert
args
.
load
is
not
None
if
args
.
task
!=
'train'
:
cfg
=
PredictConfig
(
model
=
Model
(),
session_init
=
SaverRestore
(
args
.
load
),
input_var_names
=
[
'state'
],
output_var_names
=
[
'logits:0'
])
if
args
.
task
==
'play'
:
play_model
(
cfg
)
elif
args
.
task
==
'eval'
:
eval_model_multithread
(
cfg
,
EVAL_EPISODE
)
else
:
nr_gpu
=
get_nr_gpu
()
if
nr_gpu
>
1
:
predict_tower
=
range
(
nr_gpu
)[
-
nr_gpu
/
2
:]
else
:
predict_tower
=
[
0
]
PREDICTOR_THREAD
=
len
(
predict_tower
)
*
PREDICTOR_THREAD_PER_GPU
config
=
get_config
()
if
args
.
load
:
config
.
session_init
=
SaverRestore
(
args
.
load
)
config
.
tower
=
range
(
nr_gpu
)[:
-
nr_gpu
/
2
]
or
[
0
]
logger
.
info
(
"[BA3C] Train on gpu {} and infer on gpu {}"
.
format
(
','
.
join
(
map
(
str
,
config
.
tower
)),
','
.
join
(
map
(
str
,
predict_tower
))))
AsyncMultiGPUTrainer
(
config
,
predict_tower
=
predict_tower
)
.
train
()
tensorpack/dataflow/dataset/bsds500.py
View file @
941093a9
...
@@ -73,7 +73,7 @@ class BSDS500(RNGDataFlow):
...
@@ -73,7 +73,7 @@ class BSDS500(RNGDataFlow):
gt
=
loadmat
(
gt_file
)[
'groundTruth'
][
0
]
gt
=
loadmat
(
gt_file
)[
'groundTruth'
][
0
]
n_annot
=
gt
.
shape
[
0
]
n_annot
=
gt
.
shape
[
0
]
gt
=
sum
(
gt
[
k
][
'Boundaries'
][
0
][
0
]
for
k
in
range
(
n_annot
))
gt
=
sum
(
gt
[
k
][
'Boundaries'
][
0
][
0
]
for
k
in
range
(
n_annot
))
gt
[
gt
<=
2
]
=
0
#
gt[gt <= 2] = 0
gt
=
gt
.
astype
(
'float32'
)
gt
=
gt
.
astype
(
'float32'
)
gt
/=
np
.
max
(
gt
)
gt
/=
np
.
max
(
gt
)
if
gt
.
shape
[
0
]
>
gt
.
shape
[
1
]:
if
gt
.
shape
[
0
]
>
gt
.
shape
[
1
]:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment