gym atari

cee79998 · Yuxin Wu · aa124d20 · cee79998 · cee79998 · cee79998
Commit cee79998 authored Aug 15, 2016 by Yuxin Wu
4 changed files
--- a/examples/Inception/inceptionv3.py
+++ b/examples/Inception/inceptionv3.py
@@ -271,8 +271,9 @@ def get_config():
                ClassificationError('wrong-top1', 'val-top1-error'),
                ClassificationError('wrong-top5', 'val-top5-error')]),
            ScheduledHyperParamSetter('learning_rate',
-                                      [(5, 0.03), (7, 0.01), (9, 0.006),
+                                      [(5, 0.03), (9, 0.01), (12, 0.006),
-                                       (15, 0.001), (20, 2e-4), (24, 6e-5)]),
+                                       (17, 0.003), (22, 1e-3), (36, 2e-4),
+                                       (41, 8e-5), (48, 1e-5), (53, 2e-6)]),
            HumanHyperParamSetter('learning_rate')
        ]),
        session_config=sess_config,

--- a/examples/OpenAIGym/README.md
+++ b/examples/OpenAIGym/README.md
@@ -7,10 +7,15 @@
 Models are available for the following gym atari environments (click links for videos):
-+ [Breakout-v0](https://gym.openai.com/evaluations/eval_L55gczPrQJamMGihq9tzA)
+ [AirRaid-v0](https://gym.openai.com/evaluations/eval_zIeNk5MxSGOmvGEUxrZDUw) (a bit flickering, don't know why)
-+ [AirRaid-v0](https://gym.openai.com/evaluations/eval_zIeNk5MxSGOmvGEUxrZDUw)
+ [Alien-v0](https://gym.openai.com/evaluations/eval_8NR1IvjTQkSIT6En4xSMA)
-+ [Asterix-v0](https://gym.openai.com/evaluations/eval_mees2c58QfKm5GspCjRfCA)
 + [Amidar-v0](https://gym.openai.com/evaluations/eval_HwEazbHtTYGpCialv9uPhA)
+ [Assault-v0](https://gym.openai.com/evaluations/eval_tCiHwy5QrSdFVucSbBV6Q)
+ [Asterix-v0](https://gym.openai.com/evaluations/eval_mees2c58QfKm5GspCjRfCA)
+ [Asteroids-v0](https://gym.openai.com/evaluations/eval_8eHKsRL4RzuZEq9AOLZA)
+ [Atlantis-v0](https://gym.openai.com/evaluations/eval_Z1B3d7A1QCaQk1HpO1Rg)
+ [Breakout-v0](https://gym.openai.com/evaluations/eval_L55gczPrQJamMGihq9tzA)
+ [Pong-v0](https://gym.openai.com/evaluations/eval_8L7SV59nSW6GGbbP3N4G6w)
 + [Seaquest-v0](https://gym.openai.com/evaluations/eval_N2624y3NSJWrOgoMSpOi4w)
-Note that atari game settings in gym is more difficult than the settings DeepMind papers, therefore the scores are not comparable.
+Note that atari game settings in gym is quite different from DeepMind papers, so the scores are not comparable.
--- a/examples/OpenAIGym/run-atari.py
+++ b/examples/OpenAIGym/run-atari.py
@@ -23,7 +23,7 @@ NUM_ACTIONS = None
 ENV_NAME = None
 def get_player(dumpdir=None):
-    pl = GymEnv(ENV_NAME, dumpdir=dumpdir)
+    pl = GymEnv(ENV_NAME, dumpdir=dumpdir, auto_restart=False)
    pl = MapPlayerState(pl, lambda img: cv2.resize(img, IMAGE_SIZE[::-1]))
    global NUM_ACTIONS
@@ -32,10 +32,6 @@ def get_player(dumpdir=None):
    pl = HistoryFramePlayer(pl, FRAME_HISTORY)
    return pl
-class MySimulatorWorker(SimulatorProcess):
-    def _build_player(self):
-        return get_player(train=True)
 class Model(ModelDesc):
    def _get_input_vars(self):
        assert NUM_ACTIONS is not None
@@ -80,7 +76,9 @@ def run_submission(cfg):
    dirname = 'gym-submit'
    player = get_player(dumpdir=dirname)
    predfunc = get_predict_func(cfg)
-    for _ in range(100):
+    for k in range(10):
+        if k != 0:
+            player.restart_episode()
        score = play_one_episode(player, predfunc)
        print("Score:", score)

--- a/tensorpack/RL/gymenv.py
+++ b/tensorpack/RL/gymenv.py
@@ -25,7 +25,7 @@ class GymEnv(RLEnvironment):
    """
    An OpenAI/gym wrapper. Will auto restart.
    """
-    def __init__(self, name, dumpdir=None, viz=False):
+    def __init__(self, name, dumpdir=None, viz=False, auto_restart=True):
        with _ALE_LOCK:
            self.gymenv = gym.make(name)
        if dumpdir:
@@ -35,6 +35,7 @@ class GymEnv(RLEnvironment):
        self.reset_stat()
        self.rwd_counter = StatCounter()
        self.restart_episode()
+        self.auto_restart = auto_restart
        self.viz = viz
    def restart_episode(self):
@@ -55,7 +56,8 @@ class GymEnv(RLEnvironment):
        self.rwd_counter.feed(r)
        if isOver:
            self.finish_episode()
-            self.restart_episode()
+            if self.auto_restart:
+                self.restart_episode()
        return r, isOver
    def get_action_space(self):