Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
66db04fe
Commit
66db04fe
authored
May 03, 2017
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
merge gym scripts
parent
7956bcdc
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
31 additions
and
122 deletions
+31
-122
examples/A3C-Gym/README.md
examples/A3C-Gym/README.md
+7
-3
examples/A3C-Gym/run-atari.py
examples/A3C-Gym/run-atari.py
+0
-109
examples/A3C-Gym/train-atari.py
examples/A3C-Gym/train-atari.py
+24
-10
No files found.
examples/A3C-Gym/README.md
View file @
66db04fe
...
...
@@ -8,7 +8,7 @@ Most of them are the best reproducible results on gym.
### To train on an Atari game:
`
CUDA_VISIBLE_DEVICES=0 ./train-atari.py --env Breakout-v
0`
`
./train-atari.py --env Breakout-v0 --gpu
0`
The speed is about 6~10 iterations/s on 1 GPU plus 12+ CPU cores.
In each iteration it trains on a batch of 128 new states. The network architecture is larger than what's used in the original paper.
...
...
@@ -25,10 +25,14 @@ Some practicical notes:
multiprocess Python program to get a cgroup dedicated for the task.
3.
Training with a significant slower speed (e.g. on CPU) will result in very bad score, probably because of async issues.
### To run a pretrained Atari model for 100 episodes:
### To watch the agent play (need GUI):
`./train-atari.py --task play --env Breakout-v0 --load Breakout-v0.tfmodel`
### To generate gym submission with a pretrained Atari model:
1.
Download models from
[
model zoo
](
https://goo.gl/9yIol2
)
2.
`
ENV=Breakout-v0; ./run-atari.py --load "$ENV".tfmodel --env "$ENV" --episode 10
0 --output output_dir`
2.
`
./train-atari.py --task gen_submit --load Breakout-v0.tfmodel --env Breakout-v
0 --output output_dir`
Models are available for the following atari environments (click to watch videos of my agent):
...
...
examples/A3C-Gym/run-atari.py
deleted
100755 → 0
View file @
7956bcdc
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# File: run-atari.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import
numpy
as
np
import
os
import
sys
import
re
import
time
import
random
import
argparse
import
six
import
cv2
import
tensorflow
as
tf
from
tensorpack
import
*
from
tensorpack.RL
import
*
from
common
import
play_one_episode
IMAGE_SIZE
=
(
84
,
84
)
FRAME_HISTORY
=
4
CHANNEL
=
FRAME_HISTORY
*
3
IMAGE_SHAPE3
=
IMAGE_SIZE
+
(
CHANNEL
,)
NUM_ACTIONS
=
None
ENV_NAME
=
None
def
get_player
(
dumpdir
=
None
):
pl
=
GymEnv
(
ENV_NAME
,
dumpdir
=
dumpdir
,
auto_restart
=
False
)
pl
=
MapPlayerState
(
pl
,
lambda
img
:
cv2
.
resize
(
img
,
IMAGE_SIZE
[::
-
1
]))
global
NUM_ACTIONS
NUM_ACTIONS
=
pl
.
get_action_space
()
.
num_actions
()
pl
=
HistoryFramePlayer
(
pl
,
FRAME_HISTORY
)
return
pl
class
Model
(
ModelDesc
):
def
_get_inputs
(
self
):
assert
NUM_ACTIONS
is
not
None
return
[
InputDesc
(
tf
.
float32
,
(
None
,)
+
IMAGE_SHAPE3
,
'state'
),
InputDesc
(
tf
.
int32
,
(
None
,),
'action'
),
InputDesc
(
tf
.
float32
,
(
None
,),
'futurereward'
)]
def
_get_NN_prediction
(
self
,
image
):
image
=
image
/
255.0
with
argscope
(
Conv2D
,
nl
=
tf
.
nn
.
relu
):
l
=
Conv2D
(
'conv0'
,
image
,
out_channel
=
32
,
kernel_shape
=
5
)
l
=
MaxPooling
(
'pool0'
,
l
,
2
)
l
=
Conv2D
(
'conv1'
,
l
,
out_channel
=
32
,
kernel_shape
=
5
)
l
=
MaxPooling
(
'pool1'
,
l
,
2
)
l
=
Conv2D
(
'conv2'
,
l
,
out_channel
=
64
,
kernel_shape
=
4
)
l
=
MaxPooling
(
'pool2'
,
l
,
2
)
l
=
Conv2D
(
'conv3'
,
l
,
out_channel
=
64
,
kernel_shape
=
3
)
l
=
FullyConnected
(
'fc0'
,
l
,
512
,
nl
=
tf
.
identity
)
l
=
PReLU
(
'prelu'
,
l
)
policy
=
FullyConnected
(
'fc-pi'
,
l
,
out_dim
=
NUM_ACTIONS
,
nl
=
tf
.
identity
)
return
policy
def
_build_graph
(
self
,
inputs
):
state
,
action
,
futurereward
=
inputs
policy
=
self
.
_get_NN_prediction
(
state
)
policy
=
tf
.
nn
.
softmax
(
policy
,
name
=
'policy'
)
def
run_submission
(
cfg
,
output
,
nr
):
player
=
get_player
(
dumpdir
=
output
)
predfunc
=
OfflinePredictor
(
cfg
)
logger
.
info
(
"Start evaluation: "
)
for
k
in
range
(
nr
):
if
k
!=
0
:
player
.
restart_episode
()
score
=
play_one_episode
(
player
,
predfunc
)
print
(
"Score:"
,
score
)
def
do_submit
(
output
):
gym
.
upload
(
output
,
api_key
=
'xxx'
)
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--gpu'
,
help
=
'comma separated list of GPU(s) to use.'
)
parser
.
add_argument
(
'--load'
,
help
=
'load model'
,
required
=
True
)
parser
.
add_argument
(
'--env'
,
help
=
'environment name'
,
required
=
True
)
parser
.
add_argument
(
'--episode'
,
help
=
'number of episodes to run'
,
type
=
int
,
default
=
100
)
parser
.
add_argument
(
'--output'
,
help
=
'output directory'
,
default
=
'gym-submit'
)
args
=
parser
.
parse_args
()
ENV_NAME
=
args
.
env
assert
ENV_NAME
logger
.
info
(
"Environment Name: {}"
.
format
(
ENV_NAME
))
p
=
get_player
()
del
p
# set NUM_ACTIONS
if
args
.
gpu
:
os
.
environ
[
'CUDA_VISIBLE_DEVICES'
]
=
args
.
gpu
cfg
=
PredictConfig
(
model
=
Model
(),
session_init
=
SaverRestore
(
args
.
load
),
input_names
=
[
'state'
],
output_names
=
[
'policy'
])
run_submission
(
cfg
,
args
.
output
,
args
.
episode
)
examples/A3C-Gym/train-atari.py
View file @
66db04fe
...
...
@@ -29,7 +29,7 @@ from tensorpack.tfutils.gradproc import MapGradient, SummaryGradient
from
tensorpack.RL
import
*
from
simulator
import
*
import
common
from
common
import
(
play_model
,
Evaluator
,
eval_model_multithread
)
from
common
import
(
play_model
,
Evaluator
,
eval_model_multithread
,
play_one_episode
)
IMAGE_SIZE
=
(
84
,
84
)
FRAME_HISTORY
=
4
...
...
@@ -51,11 +51,8 @@ ENV_NAME = None
def
get_player
(
viz
=
False
,
train
=
False
,
dumpdir
=
None
):
pl
=
GymEnv
(
ENV_NAME
,
dumpdir
=
dumpdir
)
def
func
(
img
):
return
cv2
.
resize
(
img
,
IMAGE_SIZE
[::
-
1
])
pl
=
MapPlayerState
(
pl
,
func
)
pl
=
GymEnv
(
ENV_NAME
,
viz
=
viz
,
dumpdir
=
dumpdir
)
pl
=
MapPlayerState
(
pl
,
lambda
img
:
cv2
.
resize
(
img
,
IMAGE_SIZE
[::
-
1
]))
global
NUM_ACTIONS
NUM_ACTIONS
=
pl
.
get_action_space
()
.
num_actions
()
...
...
@@ -63,6 +60,7 @@ def get_player(viz=False, train=False, dumpdir=None):
pl
=
HistoryFramePlayer
(
pl
,
FRAME_HISTORY
)
if
not
train
:
pl
=
PreventStuckPlayer
(
pl
,
30
,
1
)
else
:
pl
=
LimitLengthPlayer
(
pl
,
40000
)
return
pl
...
...
@@ -71,7 +69,6 @@ common.get_player = get_player
class
MySimulatorWorker
(
SimulatorProcess
):
def
_build_player
(
self
):
return
get_player
(
train
=
True
)
...
...
@@ -232,17 +229,32 @@ def get_config():
)
def
run_submission
(
cfg
,
output
,
nr
):
player
=
get_player
(
train
=
False
,
dumpdir
=
output
)
predfunc
=
OfflinePredictor
(
cfg
)
logger
.
info
(
"Start evaluation: "
)
for
k
in
range
(
nr
):
if
k
!=
0
:
player
.
restart_episode
()
score
=
play_one_episode
(
player
,
predfunc
)
print
(
"Score:"
,
score
)
# gym.upload(output, api_key='xxx')
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--gpu'
,
help
=
'comma separated list of GPU(s) to use.'
)
parser
.
add_argument
(
'--load'
,
help
=
'load model'
)
parser
.
add_argument
(
'--env'
,
help
=
'env'
,
required
=
True
)
parser
.
add_argument
(
'--task'
,
help
=
'task to perform'
,
choices
=
[
'play'
,
'eval'
,
'train'
],
default
=
'train'
)
choices
=
[
'play'
,
'eval'
,
'train'
,
'gen_submit'
],
default
=
'train'
)
parser
.
add_argument
(
'--output'
,
help
=
'output directory for submission'
,
default
=
'output_dir'
)
parser
.
add_argument
(
'--episode'
,
help
=
'number of episode to eval'
,
default
=
100
,
type
=
int
)
args
=
parser
.
parse_args
()
ENV_NAME
=
args
.
env
assert
ENV_NAME
logger
.
info
(
"Environment Name: {}"
.
format
(
ENV_NAME
))
p
=
get_player
()
del
p
# set NUM_ACTIONS
...
...
@@ -260,7 +272,9 @@ if __name__ == '__main__':
if
args
.
task
==
'play'
:
play_model
(
cfg
)
elif
args
.
task
==
'eval'
:
eval_model_multithread
(
cfg
,
EVAL_EPISODE
)
eval_model_multithread
(
cfg
,
args
.
episode
)
elif
args
.
task
==
'gen_submit'
:
run_submission
(
cfg
,
args
.
output
,
args
.
episode
)
else
:
nr_gpu
=
get_nr_gpu
()
if
nr_gpu
>
0
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment