Commit 805c44d5 authored by Yuxin Wu's avatar Yuxin Wu

Use msgpack instead of pyarrow for "serialization to disk".

parent 708e07b0
......@@ -8,13 +8,15 @@ so you don't need to look at here very often.
Here are a list of things that were changed, starting from an early version.
TensorFlow itself also changes API and those are not listed here.
+ [2018/04/05] msgpack is replaced by pyarrow. If you want old behavior,
+ [2018/08/27] msgpack is used again for "serialization to disk", because pyarrow
has no compatibility between versions. To use pyarrow instead, `export TENSORPACK_COMPATIBLE_SERIALIZE=pyarrow`.
+ [2018/04/05] msgpack is replaced by pyarrow in favor of its speed. If you want old behavior,
`export TENSORPACK_SERIALIZE=msgpack`.
+ [2018/03/20] `ModelDesc` starts to use simplified interfaces:
+ `_get_inputs()` renamed to `inputs()` and returns `tf.placeholder`s.
+ `build_graph(self, tensor1, tensor2)` returns the cost tensor directly.
+ `_get_optimizer()` renamed to `optimizer()`.
Old interface will still be available, but new ones are recommended.
Old interface will still be available for a while, but new ones are recommended.
+ [2018/03/12] `JSONWriter` use a different file name, and will not automatically restore epoch number.
`AutoResumeTrainConfig` was added to support resuming.
+ [2017/10/21]
......
......@@ -51,6 +51,15 @@ tensorpack.utils.serialize module
:undoc-members:
:show-inheritance:
tensorpack.utils.compatible_serialize module
---------------------------------
.. automodule:: tensorpack.utils.compatible_serialize
:members:
:undoc-members:
:show-inheritance:
tensorpack.utils.stats module
-----------------------------
......
......@@ -29,6 +29,8 @@ setup(
"tabulate>=0.7.7",
"tqdm>4.11.1",
"pyarrow>=0.9.0",
"msgpack>=0.5.2",
"msgpack-numpy>=0.4.0",
"pyzmq>=16",
"subprocess32; python_version < '3.0'",
"functools32; python_version < '3.0'",
......
......@@ -11,7 +11,7 @@ from ..utils import logger
from ..utils.utils import get_tqdm
from ..utils.timer import timed_operation
from ..utils.loadcaffe import get_caffe_pb
from ..utils.serialize import loads
from ..utils.compatible_serialize import loads
from ..utils.argtools import log_once
from ..utils.develop import log_deprecated
from .base import RNGDataFlow, DataFlow, DataFlowReentrantGuard
......
......@@ -7,7 +7,7 @@ from collections import defaultdict
from ..utils.utils import get_tqdm
from ..utils import logger
from ..utils.serialize import dumps, loads
from ..utils.compatible_serialize import dumps, loads
from .base import DataFlow
from .format import LMDBData, HDF5Data
......@@ -46,7 +46,7 @@ class LMDBSerializer():
if isdir:
assert not os.path.isfile(os.path.join(path, 'data.mdb')), "LMDB file exists!"
else:
assert not os.path.isfile(path), "LMDB file exists!"
assert not os.path.isfile(path), "LMDB file {} exists!".format(path)
db = lmdb.open(path, subdir=isdir,
map_size=1099511627776 * 2, readonly=False,
meminit=False, map_async=True) # need sync() at the end
......@@ -126,7 +126,7 @@ class TFRecordSerializer():
df (DataFlow): the DataFlow to serialize.
path (str): output tfrecord file.
"""
if os.environ.get('TENSORPACK_SERIALIZE', None) == 'msgpack':
if os.environ.get('TENSORPACK_COMPATIBLE_SERIALIZE', 'msgpack') == 'msgpack':
def _dumps(dp):
return dumps(dp)
else:
......
......@@ -2,6 +2,8 @@
# File: serialize.py
import os
import pyarrow as pa
from .develop import create_dummy_func
__all__ = ['loads', 'dumps']
......@@ -44,20 +46,11 @@ def loads_pyarrow(buf):
return pa.deserialize(buf)
try:
# fixed in pyarrow 0.9: https://github.com/apache/arrow/pull/1223#issuecomment-359895666
import pyarrow as pa
except ImportError:
pa = None
dumps_pyarrow = create_dummy_func('dumps_pyarrow', ['pyarrow']) # noqa
loads_pyarrow = create_dummy_func('loads_pyarrow', ['pyarrow']) # noqa
try:
import msgpack
import msgpack_numpy
msgpack_numpy.patch()
except ImportError:
assert pa is not None, "pyarrow is a dependency of tensorpack!"
loads_msgpack = create_dummy_func( # noqa
'loads_msgpack', ['msgpack', 'msgpack_numpy'])
dumps_msgpack = create_dummy_func( # noqa
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment