bug fix in DQN

776c0dbb · Yuxin Wu · 266ac578 · 776c0dbb · 776c0dbb · 776c0dbb
Commit 776c0dbb authored Aug 24, 2016 by Yuxin Wu
Showing with 6 additions and 4 deletions

examples/Atari2600/DQN.py examples/Atari2600/DQN.py +1 -1

examples/OpenAIGym/README.md examples/OpenAIGym/README.md +4 -3

examples/cifar-convnet.py examples/cifar-convnet.py +1 -0

No files found.
--- a/examples/Atari2600/DQN.py
+++ b/examples/Atari2600/DQN.py
@@ -115,7 +115,7 @@ class Model(ModelDesc):
        target = reward + (1.0 - tf.cast(isOver, tf.float32)) * GAMMA * tf.stop_gradient(best_v)
-        cost = symbf.clipped_l2_loss(target - pred_action_value)
+        cost = symbf.huber_loss(target - pred_action_value)
        summary.add_param_summary([('conv.*/W', ['histogram', 'rms']),
                                   ('fc.*/W', ['histogram', 'rms']) ])   # monitor all W
        self.cost = tf.reduce_mean(cost, name='cost')

--- a/examples/OpenAIGym/README.md
+++ b/examples/OpenAIGym/README.md
@@ -14,9 +14,10 @@ Models are available for the following gym atari environments (click links for v
 + [Asterix-v0](https://gym.openai.com/evaluations/eval_mees2c58QfKm5GspCjRfCA)
 + [Asteroids-v0](https://gym.openai.com/evaluations/eval_8eHKsRL4RzuZEq9AOLZA)
 + [Atlantis-v0](https://gym.openai.com/evaluations/eval_Z1B3d7A1QCaQk1HpO1Rg)
-+ [BattleZone-v0](https://gym.openai.com/evaluations/eval_SoLit2bR1qmFoC0AsJF6Q)
 + [BankHeist-v0](https://gym.openai.com/evaluations/eval_hifoaxFTIuLlPd38BjnOw)
+ [BattleZone-v0](https://gym.openai.com/evaluations/eval_SoLit2bR1qmFoC0AsJF6Q)
 + [BeamRider-v0](https://gym.openai.com/evaluations/eval_KuOYumrjQjixwL0spG0iCA)
+ [Berzerk-v0](https://gym.openai.com/evaluations/eval_Yri0XQbwRy62NzWILdn5IA)
 + [Breakout-v0](https://gym.openai.com/evaluations/eval_L55gczPrQJamMGihq9tzA)
 + [Carnival-v0](https://gym.openai.com/evaluations/eval_xJSOlo2lSWaH1wHEOX5vw)
 + [ChopperCommand-v0](https://gym.openai.com/evaluations/eval_tYVKyh7wQieRIKgEvVaCuw)
@@ -31,9 +32,9 @@ Models are available for the following gym atari environments (click links for v
 + [Krull-v0](https://gym.openai.com/evaluations/eval_dfOS2WzhTh6sn1FuPS9HA)
 + [KungFuMaster-v0](https://gym.openai.com/evaluations/eval_vNWDShYTRC0MhfIybeUYg)
 + [MsPacman-v0](https://gym.openai.com/evaluations/eval_kpL9bSsS4GXsYb9HuEfew)
-+ [Pooyan-v0](https://gym.openai.com/evaluations/eval_UXFVI34MSAuNTtjZcK8N0A)
-+ [Pong-v0](https://gym.openai.com/evaluations/eval_8L7SV59nSW6GGbbP3N4G6w)
 + [Phoenix-v0](https://gym.openai.com/evaluations/eval_uzUruiB3RRKUMvJIxvEzYA)
+ [Pong-v0](https://gym.openai.com/evaluations/eval_8L7SV59nSW6GGbbP3N4G6w)
+ [Pooyan-v0](https://gym.openai.com/evaluations/eval_UXFVI34MSAuNTtjZcK8N0A)
 + [Qbert-v0](https://gym.openai.com/evaluations/eval_wekCJkrWQm9NrOUzltXg)
 + [Riverraid-v0](https://gym.openai.com/evaluations/eval_OU4x3DkTfm4uaXy6CIaXg)
 + [RoadRunner-v0](https://gym.openai.com/evaluations/eval_wINKQTwxT9ipydHOXBhg)

--- a/examples/cifar-convnet.py
+++ b/examples/cifar-convnet.py
@@ -155,3 +155,4 @@ if __name__ == '__main__':
        if args.gpu:
            config.nr_tower = len(args.gpu.split(','))
        QueueInputTrainer(config).train()
+        #AsyncMultiGPUTrainer(config).train()