small doc changes

41122718 · Yuxin Wu · eb560e61 · 41122718 · 41122718 · 41122718
Commit 41122718 authored May 31, 2017 by Yuxin Wu
6 changed files
--- a/examples/GAN/BEGAN.py
+++ b/examples/GAN/BEGAN.py
@@ -64,17 +64,14 @@ class Model(GANModelDesc):
                 .Conv2D('conv1.3', NF * 2)
                 .AvgPooling('pool1', 2)
                 # 32
-
                 .Conv2D('conv2.1', NF * 2)
                 .Conv2D('conv2.2', NF * 3)
                 .AvgPooling('pool2', 2)
                 # 16
-
                 .Conv2D('conv3.1', NF * 3)
                 .Conv2D('conv3.2', NF * 4)
                 .AvgPooling('pool3', 2)
                 # 8
-
                 .Conv2D('conv4.1', NF * 4)
                 .Conv2D('conv4.2', NF * 4)

@@ -91,7 +88,7 @@ class Model(GANModelDesc):
        def summary_image(name, x):
            x = (x + 1.0) * 128.0
            x = tf.clip_by_value(x, 0, 255)
-            tf.summary.image(name, x, max_outputs=30)
+            tf.summary.image(name, tf.cast(x, tf.uint8), max_outputs=30)

        with argscope([Conv2D, FullyConnected],
                      W_init=tf.truncated_normal_initializer(stddev=0.02)):

--- a/examples/GAN/DiscoGAN-CelebA.py
+++ b/examples/GAN/DiscoGAN-CelebA.py
@@ -140,7 +140,7 @@ class Model(GANModelDesc):

        global_step = get_global_step_var()
        rate = tf.train.piecewise_constant(global_step, [np.int64(10000)], [0.01, 0.5])
-        rate = tf.identity(rate, name='rate')   # mitigate a TF bug
+        rate = tf.identity(rate, name='rate')   # TF issue#8594
        g_loss = tf.add_n([
            ((G_loss_A + G_loss_B) * 0.1 +
             (fm_loss_A + fm_loss_B) * 0.9) * (1 - rate),
@@ -207,7 +207,7 @@ if __name__ == '__main__':
        '--data', required=True,
        help='the img_align_celeba directory. should also contain list_attr_celeba.txt')
    parser.add_argument('--style-A', help='style of A', default='Male')
-    parser.add_argument('--style-B', help='style of B')
+    parser.add_argument('--style-B', help='style of B, default to "not A"')
    parser.add_argument('--load', help='load model')
    args = parser.parse_args()


--- a/examples/GAN/Improved-WGAN.py
+++ b/examples/GAN/Improved-WGAN.py
@@ -70,6 +70,7 @@ class Model(DCGAN.Model):
        self.d_loss = tf.reduce_mean(vecneg - vecpos, name='d_loss')
        self.g_loss = tf.negative(tf.reduce_mean(vecneg), name='g_loss')

+        # the gradient penalty loss
        gradients = tf.gradients(vec_interp, [interp])[0]
        gradients = tf.sqrt(tf.reduce_sum(tf.square(gradients), [1, 2, 3]))
        gradients_rms = symbolic_functions.rms(gradients, 'gradient_rms')

--- a/examples/GAN/InfoGAN-mnist.py
+++ b/examples/GAN/InfoGAN-mnist.py
@@ -153,7 +153,7 @@ class Model(GANModelDesc):
        opt = tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-6)
        # generator learns 5 times faster
        return optimizer.apply_grad_processors(
-            opt, [gradproc.ScaleGradient(('gen/.*', 5), log=True)])
+            opt, [gradproc.ScaleGradient(('gen/.*', 5))])


 def get_data():

--- a/examples/GAN/WGAN.py
+++ b/examples/GAN/WGAN.py
@@ -23,7 +23,6 @@ import DCGAN
 G.BATCH = 64


-# a hacky way to change loss & optimizer of another script
 class Model(DCGAN.Model):
    # def generator(self, z):
    # you can override generator to remove BatchNorm, it will still work in WGAN
@@ -65,6 +64,6 @@ if __name__ == '__main__':
        if args.load:
            config.session_init = SaverRestore(args.load)
        """
-        The original code uses a different schedule.
+        The original code uses a different schedule, but this seems to work well.
        """
        SeparateGANTrainer(config, d_period=3).train()
--- a/tensorpack/tfutils/gradproc.py
+++ b/tensorpack/tfutils/gradproc.py
@@ -160,11 +160,12 @@ class ScaleGradient(MapGradient):
    Scale certain gradient by a multiplier.
    """

-    def __init__(self, multipliers, log=True):
+    def __init__(self, multipliers, verbose=True, log=None):
        """
        Args:
            multipliers (tuple or list): tuple of (regex, float), or list of tuples.
-            log (bool): whether to do logging or not
+            verbose (bool): whether to print logs or not
+            log: deprecated

        Example:
            Use double learning rate for all the bias (as in caffe):
@@ -176,8 +177,11 @@ class ScaleGradient(MapGradient):
        if not isinstance(multipliers, list):
            multipliers = [multipliers]
        self.multipliers = multipliers
-        assert log in [True, False], log
-        self._log = log
+        if log is not None:
+            logger.warn("'log' in ScaleGradient(..) is renamed to 'verbose'.")
+            verbose = log
+        assert verbose in [True, False], verbose
+        self._verbose = verbose
        super(ScaleGradient, self).__init__(self._mapper)

    def _mapper(self, grad, var):
@@ -188,7 +192,7 @@ class ScaleGradient(MapGradient):
                regex = regex + '$'

            if re.match(regex, varname):
-                if self._log:
+                if self._verbose:
                    logger.info("Apply lr multiplier {} for {}".format(val, varname))
                if val != 0:    # skip zero to speed up
                    return grad * val