Commit 776c0dbb authored by Yuxin Wu's avatar Yuxin Wu

bug fix in DQN

parent 266ac578
...@@ -115,7 +115,7 @@ class Model(ModelDesc): ...@@ -115,7 +115,7 @@ class Model(ModelDesc):
target = reward + (1.0 - tf.cast(isOver, tf.float32)) * GAMMA * tf.stop_gradient(best_v) target = reward + (1.0 - tf.cast(isOver, tf.float32)) * GAMMA * tf.stop_gradient(best_v)
cost = symbf.clipped_l2_loss(target - pred_action_value) cost = symbf.huber_loss(target - pred_action_value)
summary.add_param_summary([('conv.*/W', ['histogram', 'rms']), summary.add_param_summary([('conv.*/W', ['histogram', 'rms']),
('fc.*/W', ['histogram', 'rms']) ]) # monitor all W ('fc.*/W', ['histogram', 'rms']) ]) # monitor all W
self.cost = tf.reduce_mean(cost, name='cost') self.cost = tf.reduce_mean(cost, name='cost')
......
...@@ -14,9 +14,10 @@ Models are available for the following gym atari environments (click links for v ...@@ -14,9 +14,10 @@ Models are available for the following gym atari environments (click links for v
+ [Asterix-v0](https://gym.openai.com/evaluations/eval_mees2c58QfKm5GspCjRfCA) + [Asterix-v0](https://gym.openai.com/evaluations/eval_mees2c58QfKm5GspCjRfCA)
+ [Asteroids-v0](https://gym.openai.com/evaluations/eval_8eHKsRL4RzuZEq9AOLZA) + [Asteroids-v0](https://gym.openai.com/evaluations/eval_8eHKsRL4RzuZEq9AOLZA)
+ [Atlantis-v0](https://gym.openai.com/evaluations/eval_Z1B3d7A1QCaQk1HpO1Rg) + [Atlantis-v0](https://gym.openai.com/evaluations/eval_Z1B3d7A1QCaQk1HpO1Rg)
+ [BattleZone-v0](https://gym.openai.com/evaluations/eval_SoLit2bR1qmFoC0AsJF6Q)
+ [BankHeist-v0](https://gym.openai.com/evaluations/eval_hifoaxFTIuLlPd38BjnOw) + [BankHeist-v0](https://gym.openai.com/evaluations/eval_hifoaxFTIuLlPd38BjnOw)
+ [BattleZone-v0](https://gym.openai.com/evaluations/eval_SoLit2bR1qmFoC0AsJF6Q)
+ [BeamRider-v0](https://gym.openai.com/evaluations/eval_KuOYumrjQjixwL0spG0iCA) + [BeamRider-v0](https://gym.openai.com/evaluations/eval_KuOYumrjQjixwL0spG0iCA)
+ [Berzerk-v0](https://gym.openai.com/evaluations/eval_Yri0XQbwRy62NzWILdn5IA)
+ [Breakout-v0](https://gym.openai.com/evaluations/eval_L55gczPrQJamMGihq9tzA) + [Breakout-v0](https://gym.openai.com/evaluations/eval_L55gczPrQJamMGihq9tzA)
+ [Carnival-v0](https://gym.openai.com/evaluations/eval_xJSOlo2lSWaH1wHEOX5vw) + [Carnival-v0](https://gym.openai.com/evaluations/eval_xJSOlo2lSWaH1wHEOX5vw)
+ [ChopperCommand-v0](https://gym.openai.com/evaluations/eval_tYVKyh7wQieRIKgEvVaCuw) + [ChopperCommand-v0](https://gym.openai.com/evaluations/eval_tYVKyh7wQieRIKgEvVaCuw)
...@@ -31,9 +32,9 @@ Models are available for the following gym atari environments (click links for v ...@@ -31,9 +32,9 @@ Models are available for the following gym atari environments (click links for v
+ [Krull-v0](https://gym.openai.com/evaluations/eval_dfOS2WzhTh6sn1FuPS9HA) + [Krull-v0](https://gym.openai.com/evaluations/eval_dfOS2WzhTh6sn1FuPS9HA)
+ [KungFuMaster-v0](https://gym.openai.com/evaluations/eval_vNWDShYTRC0MhfIybeUYg) + [KungFuMaster-v0](https://gym.openai.com/evaluations/eval_vNWDShYTRC0MhfIybeUYg)
+ [MsPacman-v0](https://gym.openai.com/evaluations/eval_kpL9bSsS4GXsYb9HuEfew) + [MsPacman-v0](https://gym.openai.com/evaluations/eval_kpL9bSsS4GXsYb9HuEfew)
+ [Pooyan-v0](https://gym.openai.com/evaluations/eval_UXFVI34MSAuNTtjZcK8N0A)
+ [Pong-v0](https://gym.openai.com/evaluations/eval_8L7SV59nSW6GGbbP3N4G6w)
+ [Phoenix-v0](https://gym.openai.com/evaluations/eval_uzUruiB3RRKUMvJIxvEzYA) + [Phoenix-v0](https://gym.openai.com/evaluations/eval_uzUruiB3RRKUMvJIxvEzYA)
+ [Pong-v0](https://gym.openai.com/evaluations/eval_8L7SV59nSW6GGbbP3N4G6w)
+ [Pooyan-v0](https://gym.openai.com/evaluations/eval_UXFVI34MSAuNTtjZcK8N0A)
+ [Qbert-v0](https://gym.openai.com/evaluations/eval_wekCJkrWQm9NrOUzltXg) + [Qbert-v0](https://gym.openai.com/evaluations/eval_wekCJkrWQm9NrOUzltXg)
+ [Riverraid-v0](https://gym.openai.com/evaluations/eval_OU4x3DkTfm4uaXy6CIaXg) + [Riverraid-v0](https://gym.openai.com/evaluations/eval_OU4x3DkTfm4uaXy6CIaXg)
+ [RoadRunner-v0](https://gym.openai.com/evaluations/eval_wINKQTwxT9ipydHOXBhg) + [RoadRunner-v0](https://gym.openai.com/evaluations/eval_wINKQTwxT9ipydHOXBhg)
......
...@@ -155,3 +155,4 @@ if __name__ == '__main__': ...@@ -155,3 +155,4 @@ if __name__ == '__main__':
if args.gpu: if args.gpu:
config.nr_tower = len(args.gpu.split(',')) config.nr_tower = len(args.gpu.split(','))
QueueInputTrainer(config).train() QueueInputTrainer(config).train()
#AsyncMultiGPUTrainer(config).train()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment