Commit 66db04fe authored by Yuxin Wu's avatar Yuxin Wu

merge gym scripts

parent 7956bcdc
......@@ -8,7 +8,7 @@ Most of them are the best reproducible results on gym.
### To train on an Atari game:
`CUDA_VISIBLE_DEVICES=0 ./train-atari.py --env Breakout-v0`
`./train-atari.py --env Breakout-v0 --gpu 0`
The speed is about 6~10 iterations/s on 1 GPU plus 12+ CPU cores.
In each iteration it trains on a batch of 128 new states. The network architecture is larger than what's used in the original paper.
......@@ -25,10 +25,14 @@ Some practicical notes:
multiprocess Python program to get a cgroup dedicated for the task.
3. Training with a significant slower speed (e.g. on CPU) will result in very bad score, probably because of async issues.
### To run a pretrained Atari model for 100 episodes:
### To watch the agent play (need GUI):
`./train-atari.py --task play --env Breakout-v0 --load Breakout-v0.tfmodel`
### To generate gym submission with a pretrained Atari model:
1. Download models from [model zoo](https://goo.gl/9yIol2)
2. `ENV=Breakout-v0; ./run-atari.py --load "$ENV".tfmodel --env "$ENV" --episode 100 --output output_dir`
2. `./train-atari.py --task gen_submit --load Breakout-v0.tfmodel --env Breakout-v0 --output output_dir`
Models are available for the following atari environments (click to watch videos of my agent):
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# File: run-atari.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import numpy as np
import os
import sys
import re
import time
import random
import argparse
import six
import cv2
import tensorflow as tf
from tensorpack import *
from tensorpack.RL import *
from common import play_one_episode
IMAGE_SIZE = (84, 84)
FRAME_HISTORY = 4
CHANNEL = FRAME_HISTORY * 3
IMAGE_SHAPE3 = IMAGE_SIZE + (CHANNEL,)
NUM_ACTIONS = None
ENV_NAME = None
def get_player(dumpdir=None):
pl = GymEnv(ENV_NAME, dumpdir=dumpdir, auto_restart=False)
pl = MapPlayerState(pl, lambda img: cv2.resize(img, IMAGE_SIZE[::-1]))
global NUM_ACTIONS
NUM_ACTIONS = pl.get_action_space().num_actions()
pl = HistoryFramePlayer(pl, FRAME_HISTORY)
return pl
class Model(ModelDesc):
def _get_inputs(self):
assert NUM_ACTIONS is not None
return [InputDesc(tf.float32, (None,) + IMAGE_SHAPE3, 'state'),
InputDesc(tf.int32, (None,), 'action'),
InputDesc(tf.float32, (None,), 'futurereward')]
def _get_NN_prediction(self, image):
image = image / 255.0
with argscope(Conv2D, nl=tf.nn.relu):
l = Conv2D('conv0', image, out_channel=32, kernel_shape=5)
l = MaxPooling('pool0', l, 2)
l = Conv2D('conv1', l, out_channel=32, kernel_shape=5)
l = MaxPooling('pool1', l, 2)
l = Conv2D('conv2', l, out_channel=64, kernel_shape=4)
l = MaxPooling('pool2', l, 2)
l = Conv2D('conv3', l, out_channel=64, kernel_shape=3)
l = FullyConnected('fc0', l, 512, nl=tf.identity)
l = PReLU('prelu', l)
policy = FullyConnected('fc-pi', l, out_dim=NUM_ACTIONS, nl=tf.identity)
return policy
def _build_graph(self, inputs):
state, action, futurereward = inputs
policy = self._get_NN_prediction(state)
policy = tf.nn.softmax(policy, name='policy')
def run_submission(cfg, output, nr):
player = get_player(dumpdir=output)
predfunc = OfflinePredictor(cfg)
logger.info("Start evaluation: ")
for k in range(nr):
if k != 0:
player.restart_episode()
score = play_one_episode(player, predfunc)
print("Score:", score)
def do_submit(output):
gym.upload(output, api_key='xxx')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.')
parser.add_argument('--load', help='load model', required=True)
parser.add_argument('--env', help='environment name', required=True)
parser.add_argument('--episode', help='number of episodes to run',
type=int, default=100)
parser.add_argument('--output', help='output directory', default='gym-submit')
args = parser.parse_args()
ENV_NAME = args.env
assert ENV_NAME
logger.info("Environment Name: {}".format(ENV_NAME))
p = get_player()
del p # set NUM_ACTIONS
if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
cfg = PredictConfig(
model=Model(),
session_init=SaverRestore(args.load),
input_names=['state'],
output_names=['policy'])
run_submission(cfg, args.output, args.episode)
......@@ -29,7 +29,7 @@ from tensorpack.tfutils.gradproc import MapGradient, SummaryGradient
from tensorpack.RL import *
from simulator import *
import common
from common import (play_model, Evaluator, eval_model_multithread)
from common import (play_model, Evaluator, eval_model_multithread, play_one_episode)
IMAGE_SIZE = (84, 84)
FRAME_HISTORY = 4
......@@ -51,11 +51,8 @@ ENV_NAME = None
def get_player(viz=False, train=False, dumpdir=None):
pl = GymEnv(ENV_NAME, dumpdir=dumpdir)
def func(img):
return cv2.resize(img, IMAGE_SIZE[::-1])
pl = MapPlayerState(pl, func)
pl = GymEnv(ENV_NAME, viz=viz, dumpdir=dumpdir)
pl = MapPlayerState(pl, lambda img: cv2.resize(img, IMAGE_SIZE[::-1]))
global NUM_ACTIONS
NUM_ACTIONS = pl.get_action_space().num_actions()
......@@ -63,7 +60,8 @@ def get_player(viz=False, train=False, dumpdir=None):
pl = HistoryFramePlayer(pl, FRAME_HISTORY)
if not train:
pl = PreventStuckPlayer(pl, 30, 1)
pl = LimitLengthPlayer(pl, 40000)
else:
pl = LimitLengthPlayer(pl, 40000)
return pl
......@@ -71,7 +69,6 @@ common.get_player = get_player
class MySimulatorWorker(SimulatorProcess):
def _build_player(self):
return get_player(train=True)
......@@ -232,17 +229,32 @@ def get_config():
)
def run_submission(cfg, output, nr):
player = get_player(train=False, dumpdir=output)
predfunc = OfflinePredictor(cfg)
logger.info("Start evaluation: ")
for k in range(nr):
if k != 0:
player.restart_episode()
score = play_one_episode(player, predfunc)
print("Score:", score)
# gym.upload(output, api_key='xxx')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.')
parser.add_argument('--load', help='load model')
parser.add_argument('--env', help='env', required=True)
parser.add_argument('--task', help='task to perform',
choices=['play', 'eval', 'train'], default='train')
choices=['play', 'eval', 'train', 'gen_submit'], default='train')
parser.add_argument('--output', help='output directory for submission', default='output_dir')
parser.add_argument('--episode', help='number of episode to eval', default=100, type=int)
args = parser.parse_args()
ENV_NAME = args.env
assert ENV_NAME
logger.info("Environment Name: {}".format(ENV_NAME))
p = get_player()
del p # set NUM_ACTIONS
......@@ -260,7 +272,9 @@ if __name__ == '__main__':
if args.task == 'play':
play_model(cfg)
elif args.task == 'eval':
eval_model_multithread(cfg, EVAL_EPISODE)
eval_model_multithread(cfg, args.episode)
elif args.task == 'gen_submit':
run_submission(cfg, args.output, args.episode)
else:
nr_gpu = get_nr_gpu()
if nr_gpu > 0:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment