Commit 1870496f authored by Yuxin Wu's avatar Yuxin Wu

[DQN] update docs; make eval configurable

parent 5fc1e2f9
...@@ -26,7 +26,6 @@ MEMORY_SIZE = 1e6 ...@@ -26,7 +26,6 @@ MEMORY_SIZE = 1e6
# will consume at least 1e6 * 84 * 84 bytes == 6.6G memory. # will consume at least 1e6 * 84 * 84 bytes == 6.6G memory.
INIT_MEMORY_SIZE = MEMORY_SIZE // 20 INIT_MEMORY_SIZE = MEMORY_SIZE // 20
STEPS_PER_EPOCH = 100000 // UPDATE_FREQ # each epoch is 100k state transitions STEPS_PER_EPOCH = 100000 // UPDATE_FREQ # each epoch is 100k state transitions
EVAL_EPISODE = 50
NUM_PARALLEL_PLAYERS = 3 NUM_PARALLEL_PLAYERS = 3
USE_GYM = False USE_GYM = False
...@@ -135,7 +134,7 @@ def get_config(model): ...@@ -135,7 +134,7 @@ def get_config(model):
[(0, 1), (10, 0.1), (400, 0.01)], # 1->0.1 in the first million steps [(0, 1), (10, 0.1), (400, 0.01)], # 1->0.1 in the first million steps
interp='linear'), interp='linear'),
PeriodicTrigger(Evaluator( PeriodicTrigger(Evaluator(
EVAL_EPISODE, ['state'], ['Qvalue'], get_player), args.num_eval, ['state'], ['Qvalue'], get_player),
every_k_epochs=5 if 'pong' in args.env.lower() else 10), # eval more frequently for easy games every_k_epochs=5 if 'pong' in args.env.lower() else 10), # eval more frequently for easy games
], ],
steps_per_epoch=STEPS_PER_EPOCH, steps_per_epoch=STEPS_PER_EPOCH,
...@@ -153,6 +152,7 @@ if __name__ == '__main__': ...@@ -153,6 +152,7 @@ if __name__ == '__main__':
help='either an atari rom file (that ends with .bin) or a gym atari environment name') help='either an atari rom file (that ends with .bin) or a gym atari environment name')
parser.add_argument('--algo', help='algorithm', parser.add_argument('--algo', help='algorithm',
choices=['DQN', 'Double', 'Dueling'], default='Double') choices=['DQN', 'Double', 'Dueling'], default='Double')
parser.add_argument('--num-eval', default=50, type=int)
args = parser.parse_args() args = parser.parse_args()
if args.gpu: if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
...@@ -177,7 +177,7 @@ if __name__ == '__main__': ...@@ -177,7 +177,7 @@ if __name__ == '__main__':
if args.task == 'play': if args.task == 'play':
play_n_episodes(get_player(viz=0.01), pred, 100, render=True) play_n_episodes(get_player(viz=0.01), pred, 100, render=True)
elif args.task == 'eval': elif args.task == 'eval':
eval_model_multithread(pred, EVAL_EPISODE, get_player) eval_model_multithread(pred, args.num_eval, get_player)
else: else:
logger.set_logger_dir( logger.set_logger_dir(
os.path.join('train_log', 'DQN-{}'.format( os.path.join('train_log', 'DQN-{}'.format(
......
...@@ -100,7 +100,7 @@ class Model(ModelDesc): ...@@ -100,7 +100,7 @@ class Model(ModelDesc):
def optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=1e-3, trainable=False) lr = tf.get_variable('learning_rate', initializer=1e-3, trainable=False)
tf.summary.scalar("learning_rate", lr) tf.summary.scalar("learning_rate-summary", lr)
opt = tf.train.RMSPropOptimizer(lr, decay=0.95, momentum=0.95, epsilon=1e-2) opt = tf.train.RMSPropOptimizer(lr, decay=0.95, momentum=0.95, epsilon=1e-2)
return optimizer.apply_grad_processors(opt, [gradproc.SummaryGradient()]) return optimizer.apply_grad_processors(opt, [gradproc.SummaryGradient()])
......
...@@ -54,10 +54,11 @@ Claimed performance in the paper can be reproduced, on several games I've tested ...@@ -54,10 +54,11 @@ Claimed performance in the paper can be reproduced, on several games I've tested
![DQN](curve-breakout.png) ![DQN](curve-breakout.png)
| Environment | Avg Score | Download | | Environment | Avg Score | Download |
|:--------------|:---------:|:-------------------------------------------------------------------------------------:| |:---------------|:---------:|:--------------------------------------------------------------------------------------:|
| breakout.bin | 465 | [:arrow_down:](http://models.tensorpack.com/DeepQNetwork/DoubleDQN-breakout.bin.npz) | | breakout.bin | 465 | [:arrow_down:](http://models.tensorpack.com/DeepQNetwork/DoubleDQN-breakout.bin.npz) |
| seaquest.bin | 8686 | [:arrow_down:](http://models.tensorpack.com/DeepQNetwork/DoubleDQN-seaquest.bin.npz) | | seaquest.bin | 8686 | [:arrow_down:](http://models.tensorpack.com/DeepQNetwork/DoubleDQN-seaquest.bin.npz) |
| ms_pacman.bin | 3323 | [:arrow_down:](http://models.tensorpack.com/DeepQNetwork/DoubleDQN-ms_pacman.bin.npz) | | ms_pacman.bin | 3323 | [:arrow_down:](http://models.tensorpack.com/DeepQNetwork/DoubleDQN-ms_pacman.bin.npz) |
| beam_rider.bin | 15835 | [:arrow_down:](http://models.tensorpack.com/DeepQNetwork/DoubleDQN-beam_rider.bin.npz) |
## Speed ## Speed
On one GTX 1080Ti, On one GTX 1080Ti,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment