bug fix in summary. fix TF comptability break. update PTB readme

d52d68eb · Yuxin Wu · 08140e8c · d52d68eb · d52d68eb · d52d68eb
Commit d52d68eb authored Jan 12, 2017 by Yuxin Wu
6 changed files
--- a/README.md
+++ b/README.md
@@ -58,7 +58,7 @@ The components are designed to be independent. You can use Model or DataFlow in
 + other requirements:
 ```
 pip install --user -r requirements.txt
-pip install --user -r opt-requirements.txt # (some optional dependencies, you can install later if needed)
+pip install --user -r opt-requirements.txt # (some optional dependencies required by certain submodule, you can install later if needed)
 ```
 + Enable `import tensorpack`:
 ```

--- a/examples/DoReFa-Net/alexnet-dorefa.py
+++ b/examples/DoReFa-Net/alexnet-dorefa.py
@@ -35,7 +35,8 @@ Accuracy:
    BATCH_SIZE * NUM_GPU. With a different number of GPUs in use, things might
    be a bit different, especially for learning rate.
-    With (W,A,G)=(32,32,32), 43% error.
+    With (W,A,G)=(32,32,32) -- full precision baseline, 43% error.
+    With (W,A,G)=(1,32,32) -- BWN, 46% error.
    With (W,A,G)=(1,2,6), 51% error.
    With (W,A,G)=(1,2,4), 63% error.

--- a/examples/PennTreebank/READER.md
+++ b/examples/PennTreebank/READER.md
@@ -2,7 +2,18 @@
 # LSTM language modeling on Penn Treebank dataset
 This example is mainly to demonstrate:
 1. How to train an RNN with persistent state between iterations.
 2. How to use a TF reader pipeline instead of a DataFlow for training & inference.
-More information to come.
+It trains an language model on PTB dataset, basically an equivalent of the PTB example
+in [tensorflow/models](https://github.com/tensorflow/models/tree/master/tutorials/rnn/ptb).
+It has the same performance & speed as the original example as well.
+Note that the data pipeline is completely copied from the tensorflow example.
+To Train:
+```
+./PTB-LSTM.py
+```
--- a/tensorpack/models/batch_norm.py
+++ b/tensorpack/models/batch_norm.py
@@ -84,7 +84,7 @@ def BatchNormV1(x, use_local_stat=None, decay=0.9, epsilon=1e-5):
    if use_local_stat:
        batch = tf.cast(tf.shape(x)[0], tf.float32)
-        mul = tf.select(tf.equal(batch, 1.0), 1.0, batch / (batch - 1))
+        mul = tf.where(tf.equal(batch, 1.0), 1.0, batch / (batch - 1))
        batch_var = batch_var * mul  # use unbiased variance estimator in training
        with tf.control_dependencies([ema_apply_op] if ctx.is_training else []):

--- a/tensorpack/tfutils/summary.py
+++ b/tensorpack/tfutils/summary.py
@@ -62,7 +62,7 @@ def add_param_summary(*summary_lists):
    ctx = get_current_tower_context()
    if ctx is not None and not ctx.is_main_training_tower:
        return
-    if len(summary_lists) == 0 and isinstance(summary_lists[0], list):
+    if len(summary_lists) == 1 and isinstance(summary_lists[0], list):
        logger.warn("[Deprecated] Use positional args to call add_param_summary() instead of a list.")
        summary_lists = summary_lists[0]

--- a/tensorpack/tfutils/symbolic_functions.py
+++ b/tensorpack/tfutils/symbolic_functions.py
@@ -121,7 +121,7 @@ def huber_loss(x, delta=1, name='huber_loss'):
    """
    sqrcost = tf.square(x)
    abscost = tf.abs(x)
-    return tf.select(abscost < delta,
+    return tf.where(abscost < delta,
                    sqrcost * 0.5,
                    abscost * delta - 0.5 * delta ** 2,
                    name=name)