Commit 6edb1f0d authored by Yuxin Wu's avatar Yuxin Wu

add gym examples

parent 0d54d791
# Steps to reproduce:
1. install [tensorpack](https://github.com/ppwwyyxx/tensorpack)
2. Download models from [model zoo](https://drive.google.com/open?id=0B9IPQTvr2BBkS0VhX0xmS1c5aFk)
3. `ENV=NAME_OF_ENV ./run-atari.py --load "$ENV".tfmodel --env "$ENV"`
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
# File: run-atari.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import numpy as np
import tensorflow as tf
import os, sys, re, time
import random
import argparse
import six
from tensorpack import *
from tensorpack.RL import *
IMAGE_SIZE = (84, 84)
FRAME_HISTORY = 4
GAMMA = 0.99
CHANNEL = FRAME_HISTORY * 3
IMAGE_SHAPE3 = IMAGE_SIZE + (CHANNEL,)
NUM_ACTIONS = None
ENV_NAME = None
def get_player(viz=False, train=False, dumpdir=None):
pl = GymEnv(ENV_NAME, dumpdir=dumpdir)
def func(img):
return cv2.resize(img, IMAGE_SIZE[::-1])
pl = MapPlayerState(pl, func)
global NUM_ACTIONS
NUM_ACTIONS = pl.get_action_space().num_actions()
pl = HistoryFramePlayer(pl, FRAME_HISTORY)
if not train:
pl = PreventStuckPlayer(pl, 30, 1)
pl = LimitLengthPlayer(pl, 40000)
return pl
class MySimulatorWorker(SimulatorProcess):
def _build_player(self):
return get_player(train=True)
class Model(ModelDesc):
def _get_input_vars(self):
assert NUM_ACTIONS is not None
return [InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'state'),
InputVar(tf.int32, (None,), 'action'),
InputVar(tf.float32, (None,), 'futurereward') ]
def _get_NN_prediction(self, image, is_training):
""" image: [0,255]"""
image = image / 255.0
with argscope(Conv2D, nl=tf.nn.relu):
l = Conv2D('conv0', image, out_channel=32, kernel_shape=5)
l = MaxPooling('pool0', l, 2)
l = Conv2D('conv1', l, out_channel=32, kernel_shape=5)
l = MaxPooling('pool1', l, 2)
l = Conv2D('conv2', l, out_channel=64, kernel_shape=4)
l = MaxPooling('pool2', l, 2)
l = Conv2D('conv3', l, out_channel=64, kernel_shape=3)
l = FullyConnected('fc0', l, 512, nl=tf.identity)
l = PReLU('prelu', l)
policy = FullyConnected('fc-pi', l, out_dim=NUM_ACTIONS, nl=tf.identity)
return policy
def _build_graph(self, inputs, is_training):
state, action, futurereward = inputs
policy = self._get_NN_prediction(state, is_training)
self.logits = tf.nn.softmax(policy, name='logits')
def get_gradient_processor(self):
return [MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1)),
SummaryGradient()]
def play_one_episode(player, func, verbose=False):
def f(s):
spc = player.get_action_space()
act = func([[s]])[0][0].argmax()
if random.random() < 0.001:
act = spc.sample()
if verbose:
print(act)
return act
return np.mean(player.play_one_episode(f))
def run_submission(cfg):
dirname = 'gym-submit'
player = get_player(dumpdir=dirname)
predfunc = get_predict_func(cfg)
for _ in range(100):
score = play_one_episode(player, predfunc)
print("Score:", score)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') # nargs='*' in multi mode
parser.add_argument('--load', help='load model', required=True)
parser.add_argument('--env', help='env', required=True)
args = parser.parse_args()
ENV_NAME = args.env
p = get_player(); del p # set NUM_ACTIONS
if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
cfg = PredictConfig(
model=Model(),
session_init=SaverRestore(args.load),
input_var_names=['state'],
output_var_names=['logits:0'])
run_submission(cfg)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment