Commit f1b1ff92 authored by Yuxin Wu's avatar Yuxin Wu

bug fix & update speed numbers

parent 9e436219
......@@ -38,7 +38,7 @@ EXPLORATION_EPOCH_ANNEAL = 0.01
END_EXPLORATION = 0.1
MEMORY_SIZE = 1e6
INIT_MEMORY_SIZE = 50000
INIT_MEMORY_SIZE = 5e4
STEP_PER_EPOCH = 10000
EVAL_EPISODE = 50
......
......@@ -17,7 +17,7 @@ DQN was trained on 1 GPU and it typically took 2~3 days of training to reach a s
My Batch-A3C implementation only took <2 hours with 2 GPUs (one for training and one for simulation).
This is probably the fastest RL trainer you'd find.
The x-axis is the number of iterations not wall time. The iteration speed is 6.7it/s for B-A3C and 7.3it/s for D-DQN.
The x-axis is the number of iterations not wall time, but iteration speed is about 7.8it/s for both models.
A demo trained with Double-DQN on breakout is available at [youtube](https://youtu.be/o21mddZtE5Y).
......
......@@ -18,7 +18,7 @@ A small convnet model for Cifar10 or Cifar100 dataset.
Cifar10:
90% validation accuracy after 40k step.
91% accuracy after 80k step.
18.8 step/s on TitanX
19.3 step/s on Tesla M40
Not a good for Cifar100, just for demonstration.
"""
......
......@@ -14,7 +14,9 @@ from tensorpack.tfutils.summary import *
"""
SVHN convnet.
About 3.0% validation error after 70 epoch. 2.5% after 130 epoch.
About 3.0% validation error after 70 epoch. 2.5% after 130 epoch.
Each epoch is set to 4721 iterations. The speed is about 44 it/s on a Tesla M30
"""
class Model(ModelDesc):
......
......@@ -77,7 +77,7 @@ class ExpReplay(DataFlow, Callback):
with tqdm(total=self.init_memory_size) as pbar:
while len(self.mem) < self.init_memory_size:
#from copy import deepcopy # for debug
#from copy import deepcopy # quickly fill the memory for debug
#self.mem.append(deepcopy(self.mem[0]))
self._populate_exp()
pbar.update()
......
......@@ -182,7 +182,7 @@ class QueueInputTrainer(Trainer):
self.train_op = tf.group(
self.config.optimizer.apply_gradients(grads, get_global_step_var()),
summary_moving_average(), 'train_op')
summary_moving_average(), name='train_op')
self.main_loop()
......
......@@ -3,7 +3,7 @@
# File: gpu.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import os
from .utils import change_env
__all__ = ['change_gpu', 'get_nr_gpu', 'get_gpus']
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment