update docs

2238ca93 · Yuxin Wu · 77dc71e3 · 2238ca93 · 2238ca93 · 2238ca93
Commit 2238ca93 authored Jun 15, 2018 by Yuxin Wu
7 changed files
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ It's Yet Another TF high-level API, with __speed__, __readability__ and __flexib

 1. Focus on __training speed__.
 	+ Speed comes for free with tensorpack -- it uses TensorFlow in the __efficient way__ with no extra overhead.
-	  On different CNNs, it runs training [1.2~5x faster](https://github.com/tensorpack/benchmarks/tree/master/other-wrappers) than the equivalent Keras code.
+	  On common CNNs, it runs training [1.2~5x faster](https://github.com/tensorpack/benchmarks/tree/master/other-wrappers) than the equivalent Keras code.

 	+ Data-parallel multi-GPU/distributed training strategy is off-the-shelf to use.
      It scales as well as Google's [official benchmark](https://www.tensorflow.org/performance/benchmarks).

--- a/docs/index.rst
+++ b/docs/index.rst
@@ -10,12 +10,12 @@ It's Yet Another TF wrapper, but different in:
 - Focus on **training speed**.

  - Speed comes for free with tensorpack -- it uses TensorFlow in the
-    **efficient way** with no extra overhead. On various CNNs, it runs 
+    **efficient way** with no extra overhead. On common CNNs, it runs 
    `1.2~5x faster <https://github.com/tensorpack/benchmarks/tree/master/other-wrappers>`_
    than the equivalent Keras code.

  - Data-parallel multi-GPU/distributed training strategy is off-the-shelf to use. 
-    It is as fast as Google's
+    It scales as well as Google's
    `official benchmark <https://www.tensorflow.org/performance/benchmarks>`_.
    You cannot beat its speed unless you're a TensorFlow expert.

@@ -23,13 +23,13 @@ It's Yet Another TF wrapper, but different in:

 - Focus on **large datasets**.

-  - It's unnecessary to read/preprocess data with a new language called TF.
-    Tensorpack helps you load large datasets (e.g. ImageNet) in **pure Python** with autoparallelization.
+  - You don't usually need `tf.data`. Symbolic programming often makes data processing harder.
+    Tensorpack helps you efficiently process large datasets (e.g. ImageNet) in **pure Python** with autoparallelization.

 - It's not a model wrapper.

  - There are already too many symbolic function wrappers in the world.
-    Tensorpack includes only a few common models, but you can use any other wrappers within tensorpack, including sonnet/Keras/slim/tflearn/tensorlayer/....
+    Tensorpack includes only a few common models, but you can use any symbolic function library inside tensorpack, including tf.layers/Keras/slim/tflearn/tensorlayer/...

 See :doc:`tutorial/index` to know more about these features:


--- a/docs/tutorial/trainer.md
+++ b/docs/tutorial/trainer.md
@@ -64,7 +64,9 @@ Note some __common problems__ when using these trainers:
 	instead of taking one for all and split.
 	So the total batch size would become ``(batch size of InputSource) * #GPU``.

-	Splitting a tensor for data-parallel training makes no sense at all, only to put unnecessary shape constraints on the data.
+	Splitting a tensor for data-parallel training makes no sense at all. First, why
+	wasting time in concatenating into large batches and then split them? 
+    Second, this puts unnecessary shape constraints on the data.
 	By letting each GPU train on its own input tensors, they can train on inputs of different shapes simultaneously.

 2. The tower function (your model code) will get called multipile times.

--- a/examples/DeepQNetwork/DQN.py
+++ b/examples/DeepQNetwork/DQN.py
@@ -28,7 +28,7 @@ GAMMA = 0.99
 MEMORY_SIZE = 1e6
 # will consume at least 1e6 * 84 * 84 bytes == 6.6G memory.
 INIT_MEMORY_SIZE = MEMORY_SIZE // 20
-STEPS_PER_EPOCH = 10000 // UPDATE_FREQ * 10  # each epoch is 100k played frames
+STEPS_PER_EPOCH = 100000 // UPDATE_FREQ  # each epoch is 100k played frames
 EVAL_EPISODE = 50

 NUM_ACTIONS = None
@@ -105,7 +105,7 @@ def get_config():
                every_k_steps=10000 // UPDATE_FREQ),    # update target network every 10k steps
            expreplay,
            ScheduledHyperParamSetter('learning_rate',
-                                      [(60, 4e-4), (100, 2e-4)]),
+                                      [(60, 4e-4), (100, 2e-4), (500, 5e-5)]),
            ScheduledHyperParamSetter(
                ObjAttrParam(expreplay, 'exploration'),
                [(0, 1), (10, 0.1), (320, 0.01)],   # 1->0.1 in the first million steps
@@ -116,7 +116,7 @@ def get_config():
            HumanHyperParamSetter('learning_rate'),
        ],
        steps_per_epoch=STEPS_PER_EPOCH,
-        max_epoch=1000,
+        max_epoch=800,
    )



--- a/examples/ResNet/imagenet-resnet.py
+++ b/examples/ResNet/imagenet-resnet.py
@@ -100,7 +100,7 @@ def get_config(model, fake=False):
        model=model,
        data=data,
        callbacks=callbacks,
-        steps_per_epoch=100 if args.fake else 1280000 // args.batch,
+        steps_per_epoch=100 if args.fake else 1281167 // args.batch,
        max_epoch=105,
    )


--- a/tensorpack/models/batch_norm.py
+++ b/tensorpack/models/batch_norm.py
@@ -38,12 +38,12 @@ def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5,
              internal_update=False):
    """
    Mostly equivalent to `tf.layers.batch_normalization`, but different in
-    the following:
+    the following due to historical reasons:

    1. Accepts `data_format` when `axis` is None. For 2D input, this argument will be ignored.
    2. Default value for `momentum` and `epsilon` is different.
    3. Default value for `training` is automatically obtained from `TowerContext`.
-    4. Support the `internal_update` option.
+    4. Support the `internal_update` option, which can be very useful in certain models.

    Args:
        internal_update (bool): if False, add EMA update ops to

--- a/tensorpack/utils/utils.py
+++ b/tensorpack/utils/utils.py
@@ -154,6 +154,39 @@ def execute_only_once():
    return True


+def _pick_tqdm_interval(file):
+    # Heuristics to pick a update interval for progress bar that's nice-looking for users.
+    isatty = file.isatty()
+    # Jupyter notebook should be recognized as tty.
+    # Wait for https://github.com/ipython/ipykernel/issues/268
+    try:
+        from ipykernel import iostream
+        if isinstance(file, iostream.OutStream):
+            isatty = True
+    except ImportError:
+        pass
+
+    if isatty:
+        return 0.5
+    else:
+        # When run under mpirun/slurm, isatty is always False.
+        # Here we apply some hacky heuristics for slurm.
+        if 'SLURM_JOB_ID' in os.environ:
+            if int(os.environ.get('SLURM_JOB_NUM_NODES', 1)) > 1:
+                # multi-machine job, probably not interactive
+                return 60
+            else:
+                # possibly interactive, so let's be conservative
+                return 15
+
+        if 'OMPI_COMM_WORLD_SIZE' in os.environ:
+            if int(os.environ['OMPI_COMM_WORLD_SIZE']) > 1:
+                return 60
+
+        # If not a tty, don't refresh progress bar that often
+        return 180
+
+
 def get_tqdm_kwargs(**kwargs):
    """
    Return default arguments to be used with tqdm.
@@ -174,33 +207,8 @@ def get_tqdm_kwargs(**kwargs):
        # Use this env var to override the refresh interval setting
        interval = float(os.environ['TENSORPACK_PROGRESS_REFRESH'])
    except KeyError:
+        interval = _pick_tqdm_interval(kwargs.get('file', sys.stderr))

-        f = kwargs.get('file', sys.stderr)
-        isatty = f.isatty()
-        # Jupyter notebook should be recognized as tty.
-        # Wait for https://github.com/ipython/ipykernel/issues/268
-        try:
-            from ipykernel import iostream
-            if isinstance(f, iostream.OutStream):
-                isatty = True
-        except ImportError:
-            pass
-
-        if isatty:
-            interval = 0.5
-        else:
-            # When run under mpirun/slurm, isatty is always False.
-            # Here we apply some hacky heuristics for slurm.
-            if 'SLURM_JOB_ID' in os.environ:
-                if int(os.environ.get('SLURM_JOB_NUM_NODES', 1)) > 1:
-                    # multi-machine job, probably not interactive
-                    interval = 180
-                else:
-                    # possibly interactive, so let's be conservative
-                    interval = 15
-
-            # If not a tty, don't refresh progress bar that often
-            interval = 180
    default['mininterval'] = interval
    default.update(kwargs)
    return default