Commit 45ebac95 authored by Yuxin Wu's avatar Yuxin Wu

add __init__.py (fix #705)

parent 05bf948f
...@@ -138,7 +138,7 @@ class SimpleMovingAverage(Callback): ...@@ -138,7 +138,7 @@ class SimpleMovingAverage(Callback):
window_size (int): size of the moving window window_size (int): size of the moving window
""" """
self._tensors_names = [get_op_tensor_name(x)[1] for x in tensors] self._tensor_names = [get_op_tensor_name(x)[1] for x in tensors]
self._display_names = [get_op_tensor_name(x)[0] for x in tensors] self._display_names = [get_op_tensor_name(x)[0] for x in tensors]
self._window = int(window_size) self._window = int(window_size)
self._queue = deque(maxlen=window_size) self._queue = deque(maxlen=window_size)
......
...@@ -134,7 +134,7 @@ class AccumGradOptimizer(ProxyOptimizer): ...@@ -134,7 +134,7 @@ class AccumGradOptimizer(ProxyOptimizer):
""" """
An optimizer which accumulates gradients across :math:`k` :meth:`minimize` calls, An optimizer which accumulates gradients across :math:`k` :meth:`minimize` calls,
and apply them together in every :math:`k`th :meth:`minimize` call. and apply them together in every :math:`k`th :meth:`minimize` call.
This is equivalent to using a :math:`k` times larger batch size plus a This is roughly the same as using a :math:`k` times larger batch size plus a
:math:`k` times larger learning rate, but uses much less memory. :math:`k` times larger learning rate, but uses much less memory.
Note that this implementation may not support all models. Note that this implementation may not support all models.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment