Commit a2b26f2b authored by Yuxin Wu's avatar Yuxin Wu

still use msgpack by default.... speed varies between situations; need more investigation

parent d8d3ed1a
...@@ -8,8 +8,6 @@ so you won't need to look at here very often. ...@@ -8,8 +8,6 @@ so you won't need to look at here very often.
Here are a list of things that were changed, starting from an early version. Here are a list of things that were changed, starting from an early version.
TensorFlow itself also changed APIs before 1.0 and those are not listed here. TensorFlow itself also changed APIs before 1.0 and those are not listed here.
+ [2018/01/09] Switched to pyarrow for faster serialization/deserialization on buffer-like objects.
Old serialized data may not be compatible with future versions.
+ [2017/10/21] + [2017/10/21]
tensorpack is gradually switching to a new Trainer API. tensorpack is gradually switching to a new Trainer API.
The old API will keep working for a while. See [issue](https://github.com/ppwwyyxx/tensorpack/issues/458) The old API will keep working for a while. See [issue](https://github.com/ppwwyyxx/tensorpack/issues/458)
......
...@@ -30,7 +30,7 @@ MOCK_MODULES = ['tabulate', 'h5py', ...@@ -30,7 +30,7 @@ MOCK_MODULES = ['tabulate', 'h5py',
'scipy', 'scipy.misc', 'scipy.io', 'scipy', 'scipy.misc', 'scipy.io',
'tornado', 'tornado.concurrent', 'tornado', 'tornado.concurrent',
'horovod', 'horovod.tensorflow', 'horovod', 'horovod.tensorflow',
'pyarrow', 'pyarrow', 'msgpack', 'msgpack_numpy',
'functools32'] 'functools32']
for mod_name in MOCK_MODULES: for mod_name in MOCK_MODULES:
sys.modules[mod_name] = mock.Mock(name=mod_name) sys.modules[mod_name] = mock.Mock(name=mod_name)
......
...@@ -3,7 +3,8 @@ six ...@@ -3,7 +3,8 @@ six
termcolor>=1.1 termcolor>=1.1
tabulate>=0.7.7 tabulate>=0.7.7
tqdm>4.11.1 tqdm>4.11.1
pyarrow msgpack
msgpack-numpy>=0.4.0
pyzmq>=16 pyzmq>=16
subprocess32; python_version < '3.0' subprocess32; python_version < '3.0'
functools32; python_version < '3.0' functools32; python_version < '3.0'
...@@ -3,7 +3,14 @@ ...@@ -3,7 +3,14 @@
# File: serialize.py # File: serialize.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com> # Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import pyarrow as pa import msgpack
import msgpack_numpy
msgpack_numpy.patch()
try:
import pyarrow as pa
except ImportError:
pass
__all__ = ['loads', 'dumps'] __all__ = ['loads', 'dumps']
...@@ -12,6 +19,23 @@ __all__ = ['loads', 'dumps'] ...@@ -12,6 +19,23 @@ __all__ = ['loads', 'dumps']
def dumps(obj): def dumps(obj):
""" """
Serialize an object. Serialize an object.
Returns:
Implementation-dependent bytes-like object
"""
return msgpack.dumps(obj, use_bin_type=True)
def loads(buf):
"""
Args:
buf: the output of `dumps`.
"""
return msgpack.loads(buf, encoding='utf-8')
def dumps_pyarrow(obj):
"""
Serialize an object.
Returns: Returns:
Implementation-dependent bytes-like object Implementation-dependent bytes-like object
...@@ -19,15 +43,9 @@ def dumps(obj): ...@@ -19,15 +43,9 @@ def dumps(obj):
return pa.serialize(obj).to_buffer() return pa.serialize(obj).to_buffer()
def loads(buf): def loads_pyarrow(buf):
""" """
Args: Args:
buf: the output of `dumps`. buf: the output of `dumps`.
""" """
try: return pa.deserialize(buf)
return pa.deserialize(buf)
except pa.ArrowIOError:
# Handle data serialized by old version of tensorpack.
import msgpack
import msgpack_numpy as mn
return msgpack.unpackb(buf, object_hook=mn.decode, encoding='utf-8')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment