Commit cee79998 authored by Yuxin Wu's avatar Yuxin Wu

gym atari

parent aa124d20
......@@ -271,8 +271,9 @@ def get_config():
ClassificationError('wrong-top1', 'val-top1-error'),
ClassificationError('wrong-top5', 'val-top5-error')]),
ScheduledHyperParamSetter('learning_rate',
[(5, 0.03), (7, 0.01), (9, 0.006),
(15, 0.001), (20, 2e-4), (24, 6e-5)]),
[(5, 0.03), (9, 0.01), (12, 0.006),
(17, 0.003), (22, 1e-3), (36, 2e-4),
(41, 8e-5), (48, 1e-5), (53, 2e-6)]),
HumanHyperParamSetter('learning_rate')
]),
session_config=sess_config,
......
......@@ -7,10 +7,15 @@
Models are available for the following gym atari environments (click links for videos):
+ [Breakout-v0](https://gym.openai.com/evaluations/eval_L55gczPrQJamMGihq9tzA)
+ [AirRaid-v0](https://gym.openai.com/evaluations/eval_zIeNk5MxSGOmvGEUxrZDUw)
+ [Asterix-v0](https://gym.openai.com/evaluations/eval_mees2c58QfKm5GspCjRfCA)
+ [AirRaid-v0](https://gym.openai.com/evaluations/eval_zIeNk5MxSGOmvGEUxrZDUw) (a bit flickering, don't know why)
+ [Alien-v0](https://gym.openai.com/evaluations/eval_8NR1IvjTQkSIT6En4xSMA)
+ [Amidar-v0](https://gym.openai.com/evaluations/eval_HwEazbHtTYGpCialv9uPhA)
+ [Assault-v0](https://gym.openai.com/evaluations/eval_tCiHwy5QrSdFVucSbBV6Q)
+ [Asterix-v0](https://gym.openai.com/evaluations/eval_mees2c58QfKm5GspCjRfCA)
+ [Asteroids-v0](https://gym.openai.com/evaluations/eval_8eHKsRL4RzuZEq9AOLZA)
+ [Atlantis-v0](https://gym.openai.com/evaluations/eval_Z1B3d7A1QCaQk1HpO1Rg)
+ [Breakout-v0](https://gym.openai.com/evaluations/eval_L55gczPrQJamMGihq9tzA)
+ [Pong-v0](https://gym.openai.com/evaluations/eval_8L7SV59nSW6GGbbP3N4G6w)
+ [Seaquest-v0](https://gym.openai.com/evaluations/eval_N2624y3NSJWrOgoMSpOi4w)
Note that atari game settings in gym is more difficult than the settings DeepMind papers, therefore the scores are not comparable.
Note that atari game settings in gym is quite different from DeepMind papers, so the scores are not comparable.
......@@ -23,7 +23,7 @@ NUM_ACTIONS = None
ENV_NAME = None
def get_player(dumpdir=None):
pl = GymEnv(ENV_NAME, dumpdir=dumpdir)
pl = GymEnv(ENV_NAME, dumpdir=dumpdir, auto_restart=False)
pl = MapPlayerState(pl, lambda img: cv2.resize(img, IMAGE_SIZE[::-1]))
global NUM_ACTIONS
......@@ -32,10 +32,6 @@ def get_player(dumpdir=None):
pl = HistoryFramePlayer(pl, FRAME_HISTORY)
return pl
class MySimulatorWorker(SimulatorProcess):
def _build_player(self):
return get_player(train=True)
class Model(ModelDesc):
def _get_input_vars(self):
assert NUM_ACTIONS is not None
......@@ -80,7 +76,9 @@ def run_submission(cfg):
dirname = 'gym-submit'
player = get_player(dumpdir=dirname)
predfunc = get_predict_func(cfg)
for _ in range(100):
for k in range(10):
if k != 0:
player.restart_episode()
score = play_one_episode(player, predfunc)
print("Score:", score)
......
......@@ -25,7 +25,7 @@ class GymEnv(RLEnvironment):
"""
An OpenAI/gym wrapper. Will auto restart.
"""
def __init__(self, name, dumpdir=None, viz=False):
def __init__(self, name, dumpdir=None, viz=False, auto_restart=True):
with _ALE_LOCK:
self.gymenv = gym.make(name)
if dumpdir:
......@@ -35,6 +35,7 @@ class GymEnv(RLEnvironment):
self.reset_stat()
self.rwd_counter = StatCounter()
self.restart_episode()
self.auto_restart = auto_restart
self.viz = viz
def restart_episode(self):
......@@ -55,7 +56,8 @@ class GymEnv(RLEnvironment):
self.rwd_counter.feed(r)
if isOver:
self.finish_episode()
self.restart_episode()
if self.auto_restart:
self.restart_episode()
return r, isOver
def get_action_space(self):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment