AccumGrad supports sparse update (fix #435)

40c3ab6a · Yuxin Wu · e0391e29 · 40c3ab6a
Commit 40c3ab6a authored Jun 25, 2019 by Yuxin Wu
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 6 deletions

tensorpack/tfutils/optimizer.py tensorpack/tfutils/optimizer.py +3 -6

No files found.
--- a/tensorpack/tfutils/optimizer.py
+++ b/tensorpack/tfutils/optimizer.py
@@ -145,9 +145,6 @@ class AccumGradOptimizer(ProxyOptimizer):
    This is roughly the same as using a :math:`k` times larger batch size plus a
    :math:`k` times larger learning rate, but uses much less memory.

-    Note that this implementation may not support all models.
-    E.g., it currently doesn't support sparse gradient update.
-
    This optimizer can be used in any TensorFlow code (with or without tensorpack).

    Example:
@@ -183,9 +180,9 @@ class AccumGradOptimizer(ProxyOptimizer):
        grads_and_vars = FilterNoneGrad().process(grads_and_vars)
        vs = []
        for g, v in grads_and_vars:
-            assert isinstance(g, tf.Tensor) and isinstance(v, tf.Variable), \
-                "AccumGradOptimizer only works for dense update! " \
-                "Types of v and g are {} and {}".format(type(v), type(g))
+            assert isinstance(g, (tf.Tensor, tf.IndexedSlices)) and isinstance(v, tf.Variable), \
+                "AccumGradOptimizer does not work for the gradient of {}! " \
+                "Types of v and g are {} and {}".format(v.op.name, type(v), type(g))
            vs.append(v)

        with tf.control_dependencies(None):