Commit adc9409f authored by Yuxin Wu's avatar Yuxin Wu

update serializer; update tests

parent 78762b38
...@@ -52,6 +52,14 @@ Note that the above methods only prevent variables being updated by SGD. ...@@ -52,6 +52,14 @@ Note that the above methods only prevent variables being updated by SGD.
Some variables may be updated by other means, Some variables may be updated by other means,
e.g., BatchNorm statistics are updated through the `UPDATE_OPS` collection and the [RunUpdateOps](../modules/callbacks.html#tensorpack.callbacks.RunUpdateOps) callback. e.g., BatchNorm statistics are updated through the `UPDATE_OPS` collection and the [RunUpdateOps](../modules/callbacks.html#tensorpack.callbacks.RunUpdateOps) callback.
## The model does not run on CPUs?
Some TensorFlow ops are not implemented on CPUs.
For example, it does not support many ops in NCHW format on CPUs.
Note that if you use MKL-enabled version of TensorFlow, it supports more NCHW ops.
In general, you need to implement the model in a way your version of TensorFlow supports.
## My training seems slow. Why? ## My training seems slow. Why?
Checkout the [Performance Tuning tutorial](performance-tuning.html) Checkout the [Performance Tuning tutorial](performance-tuning.html)
...@@ -66,7 +66,7 @@ setup( ...@@ -66,7 +66,7 @@ setup(
], ],
tests_require=['flake8', 'scikit-image'], tests_require=['flake8', 'scikit-image'],
extras_require={ extras_require={
'all': ['pillow', 'scipy', 'h5py', 'lmdb>=0.92', 'matplotlib', 'scikit-learn'], 'all': ['scipy', 'h5py', 'lmdb>=0.92', 'matplotlib', 'scikit-learn'],
'all: "linux" in sys_platform': ['python-prctl'], 'all: "linux" in sys_platform': ['python-prctl'],
'all: python_version < "3.0"': ['tornado'], 'all: python_version < "3.0"': ['tornado'],
}, },
......
...@@ -43,7 +43,8 @@ for _, module_name, __ in iter_modules( ...@@ -43,7 +43,8 @@ for _, module_name, __ in iter_modules(
srcpath = os.path.join(_CURR_DIR, module_name + '.py') srcpath = os.path.join(_CURR_DIR, module_name + '.py')
if not os.path.isfile(srcpath): if not os.path.isfile(srcpath):
continue continue
if not module_name.startswith('_') and \ if "_test" not in module_name and \
not module_name.startswith('_') and \
module_name not in __SKIP: module_name not in __SKIP:
_global_import(module_name) _global_import(module_name)
......
...@@ -48,5 +48,5 @@ else: ...@@ -48,5 +48,5 @@ else:
srcpath = os.path.join(_CURR_DIR, module_name + '.py') srcpath = os.path.join(_CURR_DIR, module_name + '.py')
if not os.path.isfile(srcpath): if not os.path.isfile(srcpath):
continue continue
if not module_name.startswith('_'): if not module_name.startswith('_') and "_test" not in module_name:
global_import(module_name) global_import(module_name)
...@@ -44,5 +44,7 @@ for _, module_name, _ in iter_modules( ...@@ -44,5 +44,7 @@ for _, module_name, _ in iter_modules(
continue continue
if module_name.startswith('_'): if module_name.startswith('_'):
continue continue
if "_test" in module_name:
continue
if module_name not in _SKIP: if module_name not in _SKIP:
_global_import(module_name) _global_import(module_name)
...@@ -5,6 +5,9 @@ ...@@ -5,6 +5,9 @@
import logging import logging
import unittest import unittest
import tensorflow as tf import tensorflow as tf
import numpy as np
from .pool import FixedUnPooling
class TestModel(unittest.TestCase): class TestModel(unittest.TestCase):
...@@ -24,6 +27,47 @@ class TestModel(unittest.TestCase): ...@@ -24,6 +27,47 @@ class TestModel(unittest.TestCase):
return tf.Variable(args[0]) return tf.Variable(args[0])
class TestPool(TestModel):
def test_FixedUnPooling(self):
h, w = 3, 4
scale = 2
mat = np.random.rand(h, w, 3).astype('float32')
inp = self.make_variable(mat)
inp = tf.reshape(inp, [1, h, w, 3])
output = FixedUnPooling('unpool', inp, scale)
res = self.run_variable(output)
self.assertEqual(res.shape, (1, scale * h, scale * w, 3))
# mat is on corner
ele = res[0, ::scale, ::scale, 0]
self.assertTrue((ele == mat[:, :, 0]).all())
# the rest are zeros
res[0, ::scale, ::scale, :] = 0
self.assertTrue((res == 0).all())
# Below was originally for the BilinearUpsample layer used in the HED example
# def test_BilinearUpSample(self):
# h, w = 12, 12
# scale = 2
#
# mat = np.random.rand(h, w).astype('float32')
# inp = self.make_variable(mat)
# inp = tf.reshape(inp, [1, h, w, 1])
#
# output = BilinearUpSample(inp, scale)
# res = self.run_variable(output)[0, :, :, 0]
#
# from skimage.transform import rescale
# res2 = rescale(mat, scale, mode='edge')
#
# diff = np.abs(res2 - res)
#
# # if not diff.max() < 1e-4:
# # import IPython
# # IPython.embed(config=IPython.terminal.ipapp.load_default_config())
# self.assertTrue(diff.max() < 1e-4, diff.max())
def run_test_case(case): def run_test_case(case):
suite = unittest.TestLoader().loadTestsFromTestCase(case) suite = unittest.TestLoader().loadTestsFromTestCase(case)
unittest.TextTestRunner(verbosity=2).run(suite) unittest.TextTestRunner(verbosity=2).run(suite)
......
...@@ -5,7 +5,6 @@ import numpy as np ...@@ -5,7 +5,6 @@ import numpy as np
from ..compat import tfv1 as tf # this should be avoided first in model code from ..compat import tfv1 as tf # this should be avoided first in model code
from ..utils.argtools import get_data_format, shape2d from ..utils.argtools import get_data_format, shape2d
from ._test import TestModel
from .common import layer_register from .common import layer_register
from .shape_utils import StaticDynamicShape from .shape_utils import StaticDynamicShape
from .tflayer import convert_to_tflayer_args from .tflayer import convert_to_tflayer_args
...@@ -137,44 +136,3 @@ def FixedUnPooling(x, shape, unpool_mat=None, data_format='channels_last'): ...@@ -137,44 +136,3 @@ def FixedUnPooling(x, shape, unpool_mat=None, data_format='channels_last'):
ret.set_shape(tf.TensorShape(output_shape.get_static())) ret.set_shape(tf.TensorShape(output_shape.get_static()))
return ret return ret
class TestPool(TestModel):
def test_FixedUnPooling(self):
h, w = 3, 4
scale = 2
mat = np.random.rand(h, w, 3).astype('float32')
inp = self.make_variable(mat)
inp = tf.reshape(inp, [1, h, w, 3])
output = FixedUnPooling('unpool', inp, scale)
res = self.run_variable(output)
self.assertEqual(res.shape, (1, scale * h, scale * w, 3))
# mat is on corner
ele = res[0, ::scale, ::scale, 0]
self.assertTrue((ele == mat[:, :, 0]).all())
# the rest are zeros
res[0, ::scale, ::scale, :] = 0
self.assertTrue((res == 0).all())
# Below was originally for the BilinearUpsample layer used in the HED example
# def test_BilinearUpSample(self):
# h, w = 12, 12
# scale = 2
#
# mat = np.random.rand(h, w).astype('float32')
# inp = self.make_variable(mat)
# inp = tf.reshape(inp, [1, h, w, 1])
#
# output = BilinearUpSample(inp, scale)
# res = self.run_variable(output)[0, :, :, 0]
#
# from skimage.transform import rescale
# res2 = rescale(mat, scale, mode='edge')
#
# diff = np.abs(res2 - res)
#
# # if not diff.max() < 1e-4:
# # import IPython
# # IPython.embed(config=IPython.terminal.ipapp.load_default_config())
# self.assertTrue(diff.max() < 1e-4, diff.max())
...@@ -2,14 +2,11 @@ ...@@ -2,14 +2,11 @@
# File: serialize.py # File: serialize.py
import os import os
import sys
import pickle
import msgpack import msgpack
import msgpack_numpy import msgpack_numpy
from . import logger
from .develop import create_dummy_func
msgpack_numpy.patch() msgpack_numpy.patch()
assert msgpack.version >= (0, 5, 2) assert msgpack.version >= (0, 5, 2)
...@@ -19,7 +16,10 @@ __all__ = ['loads', 'dumps'] ...@@ -19,7 +16,10 @@ __all__ = ['loads', 'dumps']
MAX_MSGPACK_LEN = 1000000000 MAX_MSGPACK_LEN = 1000000000
def dumps_msgpack(obj): class MsgpackSerializer(object):
@staticmethod
def dumps(obj):
""" """
Serialize an object. Serialize an object.
...@@ -28,8 +28,8 @@ def dumps_msgpack(obj): ...@@ -28,8 +28,8 @@ def dumps_msgpack(obj):
""" """
return msgpack.dumps(obj, use_bin_type=True) return msgpack.dumps(obj, use_bin_type=True)
@staticmethod
def loads_msgpack(buf): def loads(buf):
""" """
Args: Args:
buf: the output of `dumps`. buf: the output of `dumps`.
...@@ -43,7 +43,9 @@ def loads_msgpack(buf): ...@@ -43,7 +43,9 @@ def loads_msgpack(buf):
max_str_len=MAX_MSGPACK_LEN) max_str_len=MAX_MSGPACK_LEN)
def dumps_pyarrow(obj): class PyarrowSerializer(object):
@staticmethod
def dumps(obj):
""" """
Serialize an object. Serialize an object.
...@@ -51,51 +53,53 @@ def dumps_pyarrow(obj): ...@@ -51,51 +53,53 @@ def dumps_pyarrow(obj):
Implementation-dependent bytes-like object. Implementation-dependent bytes-like object.
May not be compatible across different versions of pyarrow. May not be compatible across different versions of pyarrow.
""" """
import pyarrow as pa
return pa.serialize(obj).to_buffer() return pa.serialize(obj).to_buffer()
@staticmethod
def dumps_bytes(obj):
"""
Returns:
bytes
"""
return PyarrowSerializer.dumps(obj).to_pybytes()
def loads_pyarrow(buf): @staticmethod
def loads(buf):
""" """
Args: Args:
buf: the output of `dumps`. buf: the output of `dumps` or `dumps_bytes`.
""" """
return pa.deserialize(buf)
# import pyarrow has a lot of side effect:
# https://github.com/apache/arrow/pull/2329
# https://groups.google.com/a/tensorflow.org/forum/#!topic/developers/TMqRaT-H2bI
# So we use msgpack as default.
if os.environ.get('TENSORPACK_SERIALIZE', 'msgpack') == 'pyarrow':
try:
import pyarrow as pa import pyarrow as pa
except ImportError: return pa.deserialize(buf)
loads_pyarrow = create_dummy_func('loads_pyarrow', ['pyarrow']) # noqa
dumps_pyarrow = create_dummy_func('dumps_pyarrow', ['pyarrow']) # noqa
if 'horovod' in sys.modules:
logger.warn("Horovod and pyarrow may have symbol conflicts. "
"Uninstall pyarrow and use msgpack instead.")
loads = loads_pyarrow
dumps = dumps_pyarrow
else:
loads = loads_msgpack
dumps = dumps_msgpack
class NonPicklableWrapper(object): class PickleSerializer(object):
@staticmethod
def dumps(obj):
"""
Returns:
bytes
""" """
TODO return pickle.dumps(obj, protocol=-1)
https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py @staticmethod
def loads(buf):
""" """
def __init__(self, obj): Args:
self._obj = obj bytes
"""
return pickle.loads(buf)
def __reduce__(self):
import dill
s = dill.dumps(self._obj)
return dill.loads, (s, )
def __call__(self, *args, **kwargs): _DEFAULT_S = os.environ.get('TENSORPACK_SERIALIZE', 'msgpack')
return self._obj(*args, **kwargs)
if _DEFAULT_S == "pyarrow":
dumps = PyarrowSerializer.dumps_bytes
loads = PyarrowSerializer.loads
elif _DEFAULT_S == "pickle":
dumps = PickleSerializer.dumps
loads = PickleSerializer.loads
else:
dumps = MsgpackSerializer.dumps
loads = MsgpackSerializer.loads
...@@ -14,11 +14,14 @@ python -c "from tensorflow.python.training.monitored_session import _HookedSessi ...@@ -14,11 +14,14 @@ python -c "from tensorflow.python.training.monitored_session import _HookedSessi
python -c "import tensorflow as tf; tf.Operation._add_control_input" python -c "import tensorflow as tf; tf.Operation._add_control_input"
# run tests # run tests
python -m tensorpack.callbacks.param_test python -m unittest tensorpack.callbacks.param_test
python -m tensorpack.tfutils.unit_tests python -m unittest tensorpack.tfutils.unit_tests
python -m unittest tensorpack.dataflow.imgaug._test python -m unittest tensorpack.dataflow.imgaug.imgaug_test
python -m unittest tensorpack.models.models_test
# use pyarrow after we organize the serializers. # use pyarrow after we organize the serializers.
# TENSORPACK_SERIALIZE=pyarrow python test_serializer.py # TENSORPACK_SERIALIZE=pyarrow python ...
TENSORPACK_SERIALIZE=msgpack python test_serializer.py python -m unittest tensorpack.dataflow.serialize_test
# e2e tests
python -m unittest discover -v python -m unittest discover -v
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment