update serializer; update tests

adc9409f · Yuxin Wu · 78762b38 · adc9409f · adc9409f · adc9409f
Commit adc9409f authored Sep 07, 2019 by Yuxin Wu
11 changed files
--- a/docs/tutorial/faq.md
+++ b/docs/tutorial/faq.md
@@ -52,6 +52,14 @@ Note that the above methods only prevent variables being updated by SGD.
 Some variables may be updated by other means,
 e.g., BatchNorm statistics are updated through the `UPDATE_OPS` collection and the [RunUpdateOps](../modules/callbacks.html#tensorpack.callbacks.RunUpdateOps) callback.
+## The model does not run on CPUs?
+Some TensorFlow ops are not implemented on CPUs.
+For example, it does not support many ops in NCHW format on CPUs.
+Note that if you use MKL-enabled version of TensorFlow, it supports more NCHW ops.
+In general, you need to implement the model in a way your version of TensorFlow supports.
 ## My training seems slow. Why?
 Checkout the [Performance Tuning tutorial](performance-tuning.html)
--- a/setup.py
+++ b/setup.py
@@ -66,7 +66,7 @@ setup(
    ],
    tests_require=['flake8', 'scikit-image'],
    extras_require={
-        'all': ['pillow', 'scipy', 'h5py', 'lmdb>=0.92', 'matplotlib', 'scikit-learn'],
+        'all': ['scipy', 'h5py', 'lmdb>=0.92', 'matplotlib', 'scikit-learn'],
        'all: "linux" in sys_platform': ['python-prctl'],
        'all: python_version < "3.0"': ['tornado'],
    },

--- a/tensorpack/dataflow/__init__.py
+++ b/tensorpack/dataflow/__init__.py
@@ -43,7 +43,8 @@ for _, module_name, __ in iter_modules(
    srcpath = os.path.join(_CURR_DIR, module_name + '.py')
    if not os.path.isfile(srcpath):
        continue
-    if not module_name.startswith('_') and \
+    if "_test" not in module_name and \
+       not module_name.startswith('_') and \
            module_name not in __SKIP:
        _global_import(module_name)

--- a/tensorpack/dataflow/imgaug/__init__.py
+++ b/tensorpack/dataflow/imgaug/__init__.py
@@ -48,5 +48,5 @@ else:
        srcpath = os.path.join(_CURR_DIR, module_name + '.py')
        if not os.path.isfile(srcpath):
            continue
-        if not module_name.startswith('_'):
+        if not module_name.startswith('_') and "_test" not in module_name:
            global_import(module_name)
--- a/tensorpack/dataflow/imgaug/_test.py
+++ b/tensorpack/dataflow/imgaug/_test.py
--- a/tests/test_serializer.py
+++ b/tests/test_serializer.py
--- a/tensorpack/models/__init__.py
+++ b/tensorpack/models/__init__.py
@@ -44,5 +44,7 @@ for _, module_name, _ in iter_modules(
        continue
    if module_name.startswith('_'):
        continue
+    if "_test" in module_name:
+        continue
    if module_name not in _SKIP:
        _global_import(module_name)
--- a/tensorpack/models/_test.py
+++ b/tensorpack/models/_test.py
@@ -5,6 +5,9 @@
 import logging
 import unittest
 import tensorflow as tf
+import numpy as np
+from .pool import FixedUnPooling
 class TestModel(unittest.TestCase):
@@ -24,6 +27,47 @@ class TestModel(unittest.TestCase):
            return tf.Variable(args[0])
+class TestPool(TestModel):
+    def test_FixedUnPooling(self):
+        h, w = 3, 4
+        scale = 2
+        mat = np.random.rand(h, w, 3).astype('float32')
+        inp = self.make_variable(mat)
+        inp = tf.reshape(inp, [1, h, w, 3])
+        output = FixedUnPooling('unpool', inp, scale)
+        res = self.run_variable(output)
+        self.assertEqual(res.shape, (1, scale * h, scale * w, 3))
+        # mat is on corner
+        ele = res[0, ::scale, ::scale, 0]
+        self.assertTrue((ele == mat[:, :, 0]).all())
+        # the rest are zeros
+        res[0, ::scale, ::scale, :] = 0
+        self.assertTrue((res == 0).all())
+# Below was originally for the BilinearUpsample layer used in the HED example
+#     def test_BilinearUpSample(self):
+#         h, w = 12, 12
+#         scale = 2
+#
+#         mat = np.random.rand(h, w).astype('float32')
+#         inp = self.make_variable(mat)
+#         inp = tf.reshape(inp, [1, h, w, 1])
+#
+#         output = BilinearUpSample(inp, scale)
+#         res = self.run_variable(output)[0, :, :, 0]
+#
+#         from skimage.transform import rescale
+#         res2 = rescale(mat, scale, mode='edge')
+#
+#         diff = np.abs(res2 - res)
+#
+#         # if not diff.max() < 1e-4:
+#         #     import IPython
+#         #     IPython.embed(config=IPython.terminal.ipapp.load_default_config())
+#         self.assertTrue(diff.max() < 1e-4, diff.max())
 def run_test_case(case):
    suite = unittest.TestLoader().loadTestsFromTestCase(case)
    unittest.TextTestRunner(verbosity=2).run(suite)

--- a/tensorpack/models/pool.py
+++ b/tensorpack/models/pool.py
@@ -5,7 +5,6 @@ import numpy as np
 from ..compat import tfv1 as tf  # this should be avoided first in model code
 from ..utils.argtools import get_data_format, shape2d
-from ._test import TestModel
 from .common import layer_register
 from .shape_utils import StaticDynamicShape
 from .tflayer import convert_to_tflayer_args
@@ -137,44 +136,3 @@ def FixedUnPooling(x, shape, unpool_mat=None, data_format='channels_last'):
    ret.set_shape(tf.TensorShape(output_shape.get_static()))
    return ret
-class TestPool(TestModel):
-    def test_FixedUnPooling(self):
-        h, w = 3, 4
-        scale = 2
-        mat = np.random.rand(h, w, 3).astype('float32')
-        inp = self.make_variable(mat)
-        inp = tf.reshape(inp, [1, h, w, 3])
-        output = FixedUnPooling('unpool', inp, scale)
-        res = self.run_variable(output)
-        self.assertEqual(res.shape, (1, scale * h, scale * w, 3))
-        # mat is on corner
-        ele = res[0, ::scale, ::scale, 0]
-        self.assertTrue((ele == mat[:, :, 0]).all())
-        # the rest are zeros
-        res[0, ::scale, ::scale, :] = 0
-        self.assertTrue((res == 0).all())
-# Below was originally for the BilinearUpsample layer used in the HED example
-#     def test_BilinearUpSample(self):
-#         h, w = 12, 12
-#         scale = 2
-#
-#         mat = np.random.rand(h, w).astype('float32')
-#         inp = self.make_variable(mat)
-#         inp = tf.reshape(inp, [1, h, w, 1])
-#
-#         output = BilinearUpSample(inp, scale)
-#         res = self.run_variable(output)[0, :, :, 0]
-#
-#         from skimage.transform import rescale
-#         res2 = rescale(mat, scale, mode='edge')
-#
-#         diff = np.abs(res2 - res)
-#
-#         # if not diff.max() < 1e-4:
-#         #     import IPython
-#         #     IPython.embed(config=IPython.terminal.ipapp.load_default_config())
-#         self.assertTrue(diff.max() < 1e-4, diff.max())
--- a/tensorpack/utils/serialize.py
+++ b/tensorpack/utils/serialize.py
@@ -2,14 +2,11 @@
 # File: serialize.py
 import os
-import sys
+import pickle
 import msgpack
 import msgpack_numpy
-from . import logger
-from .develop import create_dummy_func
 msgpack_numpy.patch()
 assert msgpack.version >= (0, 5, 2)
@@ -19,83 +16,90 @@ __all__ = ['loads', 'dumps']
 MAX_MSGPACK_LEN = 1000000000
-def dumps_msgpack(obj):
+class MsgpackSerializer(object):
-    """
-    Serialize an object.
+    @staticmethod
+    def dumps(obj):
-    Returns:
+        """
-        Implementation-dependent bytes-like object.
+        Serialize an object.
-    """
-    return msgpack.dumps(obj, use_bin_type=True)
+        Returns:
+            Implementation-dependent bytes-like object.
+        """
-def loads_msgpack(buf):
+        return msgpack.dumps(obj, use_bin_type=True)
-    """
-    Args:
+    @staticmethod
-        buf: the output of `dumps`.
+    def loads(buf):
-    """
+        """
-    # Since 0.6, the default max size was set to 1MB.
+        Args:
-    # We change it to approximately 1G.
+            buf: the output of `dumps`.
-    return msgpack.loads(buf, raw=False,
+        """
-                         max_bin_len=MAX_MSGPACK_LEN,
+        # Since 0.6, the default max size was set to 1MB.
-                         max_array_len=MAX_MSGPACK_LEN,
+        # We change it to approximately 1G.
-                         max_map_len=MAX_MSGPACK_LEN,
+        return msgpack.loads(buf, raw=False,
-                         max_str_len=MAX_MSGPACK_LEN)
+                             max_bin_len=MAX_MSGPACK_LEN,
+                             max_array_len=MAX_MSGPACK_LEN,
+                             max_map_len=MAX_MSGPACK_LEN,
-def dumps_pyarrow(obj):
+                             max_str_len=MAX_MSGPACK_LEN)
-    """
-    Serialize an object.
+class PyarrowSerializer(object):
-    Returns:
+    @staticmethod
-        Implementation-dependent bytes-like object.
+    def dumps(obj):
-        May not be compatible across different versions of pyarrow.
+        """
-    """
+        Serialize an object.
-    return pa.serialize(obj).to_buffer()
+        Returns:
+            Implementation-dependent bytes-like object.
-def loads_pyarrow(buf):
+            May not be compatible across different versions of pyarrow.
-    """
+        """
-    Args:
-        buf: the output of `dumps`.
-    """
-    return pa.deserialize(buf)
-# import pyarrow has a lot of side effect:
-# https://github.com/apache/arrow/pull/2329
-# https://groups.google.com/a/tensorflow.org/forum/#!topic/developers/TMqRaT-H2bI
-# So we use msgpack as default.
-if os.environ.get('TENSORPACK_SERIALIZE', 'msgpack') == 'pyarrow':
-    try:
        import pyarrow as pa
-    except ImportError:
+        return pa.serialize(obj).to_buffer()
-        loads_pyarrow = create_dummy_func('loads_pyarrow', ['pyarrow'])  # noqa
-        dumps_pyarrow = create_dummy_func('dumps_pyarrow', ['pyarrow'])  # noqa
+    @staticmethod
+    def dumps_bytes(obj):
-    if 'horovod' in sys.modules:
+        """
-        logger.warn("Horovod and pyarrow may have symbol conflicts. "
+        Returns:
-                    "Uninstall pyarrow and use msgpack instead.")
+            bytes
-    loads = loads_pyarrow
+        """
-    dumps = dumps_pyarrow
+        return PyarrowSerializer.dumps(obj).to_pybytes()
+    @staticmethod
+    def loads(buf):
+        """
+        Args:
+            buf: the output of `dumps` or `dumps_bytes`.
+        """
+        import pyarrow as pa
+        return pa.deserialize(buf)
+class PickleSerializer(object):
+    @staticmethod
+    def dumps(obj):
+        """
+        Returns:
+            bytes
+        """
+        return pickle.dumps(obj, protocol=-1)
+    @staticmethod
+    def loads(buf):
+        """
+        Args:
+            bytes
+        """
+        return pickle.loads(buf)
+_DEFAULT_S = os.environ.get('TENSORPACK_SERIALIZE', 'msgpack')
+if _DEFAULT_S == "pyarrow":
+    dumps = PyarrowSerializer.dumps_bytes
+    loads = PyarrowSerializer.loads
+elif _DEFAULT_S == "pickle":
+    dumps = PickleSerializer.dumps
+    loads = PickleSerializer.loads
 else:
-    loads = loads_msgpack
+    dumps = MsgpackSerializer.dumps
-    dumps = dumps_msgpack
+    loads = MsgpackSerializer.loads
-class NonPicklableWrapper(object):
-    """
-    TODO
-    https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py
-    """
-    def __init__(self, obj):
-        self._obj = obj
-    def __reduce__(self):
-        import dill
-        s = dill.dumps(self._obj)
-        return dill.loads, (s, )
-    def __call__(self, *args, **kwargs):
-        return self._obj(*args, **kwargs)
--- a/tests/run-tests.sh
+++ b/tests/run-tests.sh
@@ -14,11 +14,14 @@ python -c "from tensorflow.python.training.monitored_session import _HookedSessi
 python -c "import tensorflow as tf; tf.Operation._add_control_input"
 # run tests
-python -m tensorpack.callbacks.param_test
+python -m unittest tensorpack.callbacks.param_test
-python -m tensorpack.tfutils.unit_tests
+python -m unittest tensorpack.tfutils.unit_tests
-python -m unittest tensorpack.dataflow.imgaug._test
+python -m unittest tensorpack.dataflow.imgaug.imgaug_test
+python -m unittest tensorpack.models.models_test
 # use pyarrow after we organize the serializers.
-# TENSORPACK_SERIALIZE=pyarrow python test_serializer.py
+# TENSORPACK_SERIALIZE=pyarrow python ...
-TENSORPACK_SERIALIZE=msgpack python test_serializer.py
+python -m unittest tensorpack.dataflow.serialize_test
+# e2e tests
 python -m unittest discover -v