Commit 68981b66 authored by Yuxin Wu's avatar Yuxin Wu

[dump lmdb] add write frequency option

parent 73c8dd32
...@@ -41,7 +41,7 @@ def dump_dataset_images(ds, dirname, max_count=None, index=0): ...@@ -41,7 +41,7 @@ def dump_dataset_images(ds, dirname, max_count=None, index=0):
cv2.imwrite(os.path.join(dirname, "{}.jpg".format(i)), img) cv2.imwrite(os.path.join(dirname, "{}.jpg".format(i)), img)
def dump_dataflow_to_lmdb(ds, lmdb_path): def dump_dataflow_to_lmdb(ds, lmdb_path, write_frequency=5000):
""" """
Dump a Dataflow to a lmdb database, where the keys are indices and values Dump a Dataflow to a lmdb database, where the keys are indices and values
are serialized datapoints. are serialized datapoints.
...@@ -51,6 +51,7 @@ def dump_dataflow_to_lmdb(ds, lmdb_path): ...@@ -51,6 +51,7 @@ def dump_dataflow_to_lmdb(ds, lmdb_path):
Args: Args:
ds (DataFlow): the DataFlow to dump. ds (DataFlow): the DataFlow to dump.
lmdb_path (str): output path. Either a directory or a mdb file. lmdb_path (str): output path. Either a directory or a mdb file.
write_frequency (int): the frequency to write back data to disk.
""" """
assert isinstance(ds, DataFlow), type(ds) assert isinstance(ds, DataFlow), type(ds)
isdir = os.path.isdir(lmdb_path) isdir = os.path.isdir(lmdb_path)
...@@ -73,7 +74,7 @@ def dump_dataflow_to_lmdb(ds, lmdb_path): ...@@ -73,7 +74,7 @@ def dump_dataflow_to_lmdb(ds, lmdb_path):
try: try:
while True: while True:
with db.begin(write=True) as txn: with db.begin(write=True) as txn:
for _ in range(1000): for _ in range(write_frequency):
idx += 1 idx += 1
dp = next(itr) dp = next(itr)
txn.put(u'{}'.format(idx).encode('ascii'), dumps(dp)) txn.put(u'{}'.format(idx).encode('ascii'), dumps(dp))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment