Commit adc9409f authored by Yuxin Wu's avatar Yuxin Wu

update serializer; update tests

parent 78762b38
......@@ -52,6 +52,14 @@ Note that the above methods only prevent variables being updated by SGD.
Some variables may be updated by other means,
e.g., BatchNorm statistics are updated through the `UPDATE_OPS` collection and the [RunUpdateOps](../modules/callbacks.html#tensorpack.callbacks.RunUpdateOps) callback.
## The model does not run on CPUs?
Some TensorFlow ops are not implemented on CPUs.
For example, it does not support many ops in NCHW format on CPUs.
Note that if you use MKL-enabled version of TensorFlow, it supports more NCHW ops.
In general, you need to implement the model in a way your version of TensorFlow supports.
## My training seems slow. Why?
Checkout the [Performance Tuning tutorial](performance-tuning.html)
......@@ -66,7 +66,7 @@ setup(
],
tests_require=['flake8', 'scikit-image'],
extras_require={
'all': ['pillow', 'scipy', 'h5py', 'lmdb>=0.92', 'matplotlib', 'scikit-learn'],
'all': ['scipy', 'h5py', 'lmdb>=0.92', 'matplotlib', 'scikit-learn'],
'all: "linux" in sys_platform': ['python-prctl'],
'all: python_version < "3.0"': ['tornado'],
},
......
......@@ -43,7 +43,8 @@ for _, module_name, __ in iter_modules(
srcpath = os.path.join(_CURR_DIR, module_name + '.py')
if not os.path.isfile(srcpath):
continue
if not module_name.startswith('_') and \
if "_test" not in module_name and \
not module_name.startswith('_') and \
module_name not in __SKIP:
_global_import(module_name)
......
......@@ -48,5 +48,5 @@ else:
srcpath = os.path.join(_CURR_DIR, module_name + '.py')
if not os.path.isfile(srcpath):
continue
if not module_name.startswith('_'):
if not module_name.startswith('_') and "_test" not in module_name:
global_import(module_name)
......@@ -44,5 +44,7 @@ for _, module_name, _ in iter_modules(
continue
if module_name.startswith('_'):
continue
if "_test" in module_name:
continue
if module_name not in _SKIP:
_global_import(module_name)
......@@ -5,6 +5,9 @@
import logging
import unittest
import tensorflow as tf
import numpy as np
from .pool import FixedUnPooling
class TestModel(unittest.TestCase):
......@@ -24,6 +27,47 @@ class TestModel(unittest.TestCase):
return tf.Variable(args[0])
class TestPool(TestModel):
def test_FixedUnPooling(self):
h, w = 3, 4
scale = 2
mat = np.random.rand(h, w, 3).astype('float32')
inp = self.make_variable(mat)
inp = tf.reshape(inp, [1, h, w, 3])
output = FixedUnPooling('unpool', inp, scale)
res = self.run_variable(output)
self.assertEqual(res.shape, (1, scale * h, scale * w, 3))
# mat is on corner
ele = res[0, ::scale, ::scale, 0]
self.assertTrue((ele == mat[:, :, 0]).all())
# the rest are zeros
res[0, ::scale, ::scale, :] = 0
self.assertTrue((res == 0).all())
# Below was originally for the BilinearUpsample layer used in the HED example
# def test_BilinearUpSample(self):
# h, w = 12, 12
# scale = 2
#
# mat = np.random.rand(h, w).astype('float32')
# inp = self.make_variable(mat)
# inp = tf.reshape(inp, [1, h, w, 1])
#
# output = BilinearUpSample(inp, scale)
# res = self.run_variable(output)[0, :, :, 0]
#
# from skimage.transform import rescale
# res2 = rescale(mat, scale, mode='edge')
#
# diff = np.abs(res2 - res)
#
# # if not diff.max() < 1e-4:
# # import IPython
# # IPython.embed(config=IPython.terminal.ipapp.load_default_config())
# self.assertTrue(diff.max() < 1e-4, diff.max())
def run_test_case(case):
suite = unittest.TestLoader().loadTestsFromTestCase(case)
unittest.TextTestRunner(verbosity=2).run(suite)
......
......@@ -5,7 +5,6 @@ import numpy as np
from ..compat import tfv1 as tf # this should be avoided first in model code
from ..utils.argtools import get_data_format, shape2d
from ._test import TestModel
from .common import layer_register
from .shape_utils import StaticDynamicShape
from .tflayer import convert_to_tflayer_args
......@@ -137,44 +136,3 @@ def FixedUnPooling(x, shape, unpool_mat=None, data_format='channels_last'):
ret.set_shape(tf.TensorShape(output_shape.get_static()))
return ret
class TestPool(TestModel):
def test_FixedUnPooling(self):
h, w = 3, 4
scale = 2
mat = np.random.rand(h, w, 3).astype('float32')
inp = self.make_variable(mat)
inp = tf.reshape(inp, [1, h, w, 3])
output = FixedUnPooling('unpool', inp, scale)
res = self.run_variable(output)
self.assertEqual(res.shape, (1, scale * h, scale * w, 3))
# mat is on corner
ele = res[0, ::scale, ::scale, 0]
self.assertTrue((ele == mat[:, :, 0]).all())
# the rest are zeros
res[0, ::scale, ::scale, :] = 0
self.assertTrue((res == 0).all())
# Below was originally for the BilinearUpsample layer used in the HED example
# def test_BilinearUpSample(self):
# h, w = 12, 12
# scale = 2
#
# mat = np.random.rand(h, w).astype('float32')
# inp = self.make_variable(mat)
# inp = tf.reshape(inp, [1, h, w, 1])
#
# output = BilinearUpSample(inp, scale)
# res = self.run_variable(output)[0, :, :, 0]
#
# from skimage.transform import rescale
# res2 = rescale(mat, scale, mode='edge')
#
# diff = np.abs(res2 - res)
#
# # if not diff.max() < 1e-4:
# # import IPython
# # IPython.embed(config=IPython.terminal.ipapp.load_default_config())
# self.assertTrue(diff.max() < 1e-4, diff.max())
......@@ -2,14 +2,11 @@
# File: serialize.py
import os
import sys
import pickle
import msgpack
import msgpack_numpy
from . import logger
from .develop import create_dummy_func
msgpack_numpy.patch()
assert msgpack.version >= (0, 5, 2)
......@@ -19,83 +16,90 @@ __all__ = ['loads', 'dumps']
MAX_MSGPACK_LEN = 1000000000
def dumps_msgpack(obj):
"""
Serialize an object.
Returns:
Implementation-dependent bytes-like object.
"""
return msgpack.dumps(obj, use_bin_type=True)
def loads_msgpack(buf):
"""
Args:
buf: the output of `dumps`.
"""
# Since 0.6, the default max size was set to 1MB.
# We change it to approximately 1G.
return msgpack.loads(buf, raw=False,
max_bin_len=MAX_MSGPACK_LEN,
max_array_len=MAX_MSGPACK_LEN,
max_map_len=MAX_MSGPACK_LEN,
max_str_len=MAX_MSGPACK_LEN)
def dumps_pyarrow(obj):
"""
Serialize an object.
Returns:
Implementation-dependent bytes-like object.
May not be compatible across different versions of pyarrow.
"""
return pa.serialize(obj).to_buffer()
def loads_pyarrow(buf):
"""
Args:
buf: the output of `dumps`.
"""
return pa.deserialize(buf)
# import pyarrow has a lot of side effect:
# https://github.com/apache/arrow/pull/2329
# https://groups.google.com/a/tensorflow.org/forum/#!topic/developers/TMqRaT-H2bI
# So we use msgpack as default.
if os.environ.get('TENSORPACK_SERIALIZE', 'msgpack') == 'pyarrow':
try:
class MsgpackSerializer(object):
@staticmethod
def dumps(obj):
"""
Serialize an object.
Returns:
Implementation-dependent bytes-like object.
"""
return msgpack.dumps(obj, use_bin_type=True)
@staticmethod
def loads(buf):
"""
Args:
buf: the output of `dumps`.
"""
# Since 0.6, the default max size was set to 1MB.
# We change it to approximately 1G.
return msgpack.loads(buf, raw=False,
max_bin_len=MAX_MSGPACK_LEN,
max_array_len=MAX_MSGPACK_LEN,
max_map_len=MAX_MSGPACK_LEN,
max_str_len=MAX_MSGPACK_LEN)
class PyarrowSerializer(object):
@staticmethod
def dumps(obj):
"""
Serialize an object.
Returns:
Implementation-dependent bytes-like object.
May not be compatible across different versions of pyarrow.
"""
import pyarrow as pa
except ImportError:
loads_pyarrow = create_dummy_func('loads_pyarrow', ['pyarrow']) # noqa
dumps_pyarrow = create_dummy_func('dumps_pyarrow', ['pyarrow']) # noqa
if 'horovod' in sys.modules:
logger.warn("Horovod and pyarrow may have symbol conflicts. "
"Uninstall pyarrow and use msgpack instead.")
loads = loads_pyarrow
dumps = dumps_pyarrow
return pa.serialize(obj).to_buffer()
@staticmethod
def dumps_bytes(obj):
"""
Returns:
bytes
"""
return PyarrowSerializer.dumps(obj).to_pybytes()
@staticmethod
def loads(buf):
"""
Args:
buf: the output of `dumps` or `dumps_bytes`.
"""
import pyarrow as pa
return pa.deserialize(buf)
class PickleSerializer(object):
@staticmethod
def dumps(obj):
"""
Returns:
bytes
"""
return pickle.dumps(obj, protocol=-1)
@staticmethod
def loads(buf):
"""
Args:
bytes
"""
return pickle.loads(buf)
_DEFAULT_S = os.environ.get('TENSORPACK_SERIALIZE', 'msgpack')
if _DEFAULT_S == "pyarrow":
dumps = PyarrowSerializer.dumps_bytes
loads = PyarrowSerializer.loads
elif _DEFAULT_S == "pickle":
dumps = PickleSerializer.dumps
loads = PickleSerializer.loads
else:
loads = loads_msgpack
dumps = dumps_msgpack
class NonPicklableWrapper(object):
"""
TODO
https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py
"""
def __init__(self, obj):
self._obj = obj
def __reduce__(self):
import dill
s = dill.dumps(self._obj)
return dill.loads, (s, )
def __call__(self, *args, **kwargs):
return self._obj(*args, **kwargs)
dumps = MsgpackSerializer.dumps
loads = MsgpackSerializer.loads
......@@ -14,11 +14,14 @@ python -c "from tensorflow.python.training.monitored_session import _HookedSessi
python -c "import tensorflow as tf; tf.Operation._add_control_input"
# run tests
python -m tensorpack.callbacks.param_test
python -m tensorpack.tfutils.unit_tests
python -m unittest tensorpack.dataflow.imgaug._test
python -m unittest tensorpack.callbacks.param_test
python -m unittest tensorpack.tfutils.unit_tests
python -m unittest tensorpack.dataflow.imgaug.imgaug_test
python -m unittest tensorpack.models.models_test
# use pyarrow after we organize the serializers.
# TENSORPACK_SERIALIZE=pyarrow python test_serializer.py
TENSORPACK_SERIALIZE=msgpack python test_serializer.py
# TENSORPACK_SERIALIZE=pyarrow python ...
python -m unittest tensorpack.dataflow.serialize_test
# e2e tests
python -m unittest discover -v
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment