Commit d8d3ed1a authored by Yuxin Wu's avatar Yuxin Wu

replace msgpack by pyarrow

parent a3272aad
......@@ -8,6 +8,8 @@ so you won't need to look at here very often.
Here are a list of things that were changed, starting from an early version.
TensorFlow itself also changed APIs before 1.0 and those are not listed here.
+ [2018/01/09] Switched to pyarrow for faster serialization/deserialization on buffer-like objects.
Old serialized data may not be compatible with future versions.
+ [2017/10/21]
tensorpack is gradually switching to a new Trainer API.
The old API will keep working for a while. See [issue](https://github.com/ppwwyyxx/tensorpack/issues/458)
......
......@@ -30,7 +30,7 @@ MOCK_MODULES = ['tabulate', 'h5py',
'scipy', 'scipy.misc', 'scipy.io',
'tornado', 'tornado.concurrent',
'horovod', 'horovod.tensorflow',
'msgpack', 'msgpack_numpy',
'pyarrow',
'functools32']
for mod_name in MOCK_MODULES:
sys.modules[mod_name] = mock.Mock(name=mod_name)
......
......@@ -3,8 +3,7 @@ six
termcolor>=1.1
tabulate>=0.7.7
tqdm>4.11.1
msgpack
msgpack-numpy>=0.3.9
pyarrow
pyzmq>=16
subprocess32; python_version < '3.0'
functools32; python_version < '3.0'
......@@ -32,7 +32,7 @@ def send_dataflow_zmq(df, addr, hwm=50, format=None):
addr: a ZMQ socket endpoint.
hwm (int): ZMQ high-water mark (buffer size)
format (str): The serialization format.
Default format would use :mod:`tensorpack.utils.serialize` (i.e. msgpack).
Default format would use :mod:`tensorpack.utils.serialize`.
An alternate format is 'zmq_op', used by https://github.com/tensorpack/zmq_ops.
"""
assert format in [None, 'zmq_op']
......
......@@ -3,10 +3,7 @@
# File: serialize.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import msgpack
import msgpack_numpy
msgpack_numpy.patch()
import pyarrow as pa
__all__ = ['loads', 'dumps']
......@@ -17,14 +14,20 @@ def dumps(obj):
Serialize an object.
Returns:
str
Implementation-dependent bytes-like object
"""
return msgpack.dumps(obj, use_bin_type=True)
return pa.serialize(obj).to_buffer()
def loads(buf):
"""
Args:
buf (str): serialized object.
buf: the output of `dumps`.
"""
return msgpack.loads(buf, encoding='utf-8')
try:
return pa.deserialize(buf)
except pa.ArrowIOError:
# Handle data serialized by old version of tensorpack.
import msgpack
import msgpack_numpy as mn
return msgpack.unpackb(buf, object_hook=mn.decode, encoding='utf-8')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment