Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
6edb1f0d
Commit
6edb1f0d
authored
Aug 07, 2016
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add gym examples
parent
0d54d791
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
120 additions
and
0 deletions
+120
-0
examples/OpenAIGym/README.md
examples/OpenAIGym/README.md
+6
-0
examples/OpenAIGym/run-atari.py
examples/OpenAIGym/run-atari.py
+114
-0
No files found.
examples/OpenAIGym/README.md
0 → 100644
View file @
6edb1f0d
# Steps to reproduce:
1.
install
[
tensorpack
](
https://github.com/ppwwyyxx/tensorpack
)
2.
Download models from
[
model zoo
](
https://drive.google.com/open?id=0B9IPQTvr2BBkS0VhX0xmS1c5aFk
)
3.
`ENV=NAME_OF_ENV ./run-atari.py --load "$ENV".tfmodel --env "$ENV"`
examples/OpenAIGym/run-atari.py
0 → 100755
View file @
6edb1f0d
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
# File: run-atari.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import
numpy
as
np
import
tensorflow
as
tf
import
os
,
sys
,
re
,
time
import
random
import
argparse
import
six
from
tensorpack
import
*
from
tensorpack.RL
import
*
IMAGE_SIZE
=
(
84
,
84
)
FRAME_HISTORY
=
4
GAMMA
=
0.99
CHANNEL
=
FRAME_HISTORY
*
3
IMAGE_SHAPE3
=
IMAGE_SIZE
+
(
CHANNEL
,)
NUM_ACTIONS
=
None
ENV_NAME
=
None
def
get_player
(
viz
=
False
,
train
=
False
,
dumpdir
=
None
):
pl
=
GymEnv
(
ENV_NAME
,
dumpdir
=
dumpdir
)
def
func
(
img
):
return
cv2
.
resize
(
img
,
IMAGE_SIZE
[::
-
1
])
pl
=
MapPlayerState
(
pl
,
func
)
global
NUM_ACTIONS
NUM_ACTIONS
=
pl
.
get_action_space
()
.
num_actions
()
pl
=
HistoryFramePlayer
(
pl
,
FRAME_HISTORY
)
if
not
train
:
pl
=
PreventStuckPlayer
(
pl
,
30
,
1
)
pl
=
LimitLengthPlayer
(
pl
,
40000
)
return
pl
class
MySimulatorWorker
(
SimulatorProcess
):
def
_build_player
(
self
):
return
get_player
(
train
=
True
)
class
Model
(
ModelDesc
):
def
_get_input_vars
(
self
):
assert
NUM_ACTIONS
is
not
None
return
[
InputVar
(
tf
.
float32
,
(
None
,)
+
IMAGE_SHAPE3
,
'state'
),
InputVar
(
tf
.
int32
,
(
None
,),
'action'
),
InputVar
(
tf
.
float32
,
(
None
,),
'futurereward'
)
]
def
_get_NN_prediction
(
self
,
image
,
is_training
):
""" image: [0,255]"""
image
=
image
/
255.0
with
argscope
(
Conv2D
,
nl
=
tf
.
nn
.
relu
):
l
=
Conv2D
(
'conv0'
,
image
,
out_channel
=
32
,
kernel_shape
=
5
)
l
=
MaxPooling
(
'pool0'
,
l
,
2
)
l
=
Conv2D
(
'conv1'
,
l
,
out_channel
=
32
,
kernel_shape
=
5
)
l
=
MaxPooling
(
'pool1'
,
l
,
2
)
l
=
Conv2D
(
'conv2'
,
l
,
out_channel
=
64
,
kernel_shape
=
4
)
l
=
MaxPooling
(
'pool2'
,
l
,
2
)
l
=
Conv2D
(
'conv3'
,
l
,
out_channel
=
64
,
kernel_shape
=
3
)
l
=
FullyConnected
(
'fc0'
,
l
,
512
,
nl
=
tf
.
identity
)
l
=
PReLU
(
'prelu'
,
l
)
policy
=
FullyConnected
(
'fc-pi'
,
l
,
out_dim
=
NUM_ACTIONS
,
nl
=
tf
.
identity
)
return
policy
def
_build_graph
(
self
,
inputs
,
is_training
):
state
,
action
,
futurereward
=
inputs
policy
=
self
.
_get_NN_prediction
(
state
,
is_training
)
self
.
logits
=
tf
.
nn
.
softmax
(
policy
,
name
=
'logits'
)
def
get_gradient_processor
(
self
):
return
[
MapGradient
(
lambda
grad
:
tf
.
clip_by_average_norm
(
grad
,
0.1
)),
SummaryGradient
()]
def
play_one_episode
(
player
,
func
,
verbose
=
False
):
def
f
(
s
):
spc
=
player
.
get_action_space
()
act
=
func
([[
s
]])[
0
][
0
]
.
argmax
()
if
random
.
random
()
<
0.001
:
act
=
spc
.
sample
()
if
verbose
:
print
(
act
)
return
act
return
np
.
mean
(
player
.
play_one_episode
(
f
))
def
run_submission
(
cfg
):
dirname
=
'gym-submit'
player
=
get_player
(
dumpdir
=
dirname
)
predfunc
=
get_predict_func
(
cfg
)
for
_
in
range
(
100
):
score
=
play_one_episode
(
player
,
predfunc
)
print
(
"Score:"
,
score
)
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--gpu'
,
help
=
'comma separated list of GPU(s) to use.'
)
# nargs='*' in multi mode
parser
.
add_argument
(
'--load'
,
help
=
'load model'
,
required
=
True
)
parser
.
add_argument
(
'--env'
,
help
=
'env'
,
required
=
True
)
args
=
parser
.
parse_args
()
ENV_NAME
=
args
.
env
p
=
get_player
();
del
p
# set NUM_ACTIONS
if
args
.
gpu
:
os
.
environ
[
'CUDA_VISIBLE_DEVICES'
]
=
args
.
gpu
cfg
=
PredictConfig
(
model
=
Model
(),
session_init
=
SaverRestore
(
args
.
load
),
input_var_names
=
[
'state'
],
output_var_names
=
[
'logits:0'
])
run_submission
(
cfg
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment