Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
9b318943
Commit
9b318943
authored
May 23, 2019
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
grow lmdb map_size (fix #1209)
parent
413059b1
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
27 additions
and
4 deletions
+27
-4
examples/keras/mnist-keras.py
examples/keras/mnist-keras.py
+2
-1
tensorpack/dataflow/serialize.py
tensorpack/dataflow/serialize.py
+25
-3
No files found.
examples/keras/mnist-keras.py
View file @
9b318943
...
...
@@ -20,7 +20,8 @@ KL = keras.layers
This is an mnist example demonstrating how to use Keras symbolic function inside tensorpack.
This way you can define models in Keras-style, and benefit from the more efficeint trainers in tensorpack.
Note: this example does not work for replicated-style data-parallel trainers.
Note: this example does not work for replicated-style data-parallel trainers, so may be less efficient
for some models.
"""
IMAGE_SIZE
=
28
...
...
tensorpack/dataflow/serialize.py
View file @
9b318943
...
...
@@ -3,6 +3,7 @@
import
numpy
as
np
import
os
import
platform
from
collections
import
defaultdict
from
..utils
import
logger
...
...
@@ -47,10 +48,31 @@ class LMDBSerializer():
assert
not
os
.
path
.
isfile
(
os
.
path
.
join
(
path
,
'data.mdb'
)),
"LMDB file exists!"
else
:
assert
not
os
.
path
.
isfile
(
path
),
"LMDB file {} exists!"
.
format
(
path
)
# It's OK to use super large map_size on Linux, but not on other platforms
# See: https://github.com/NVIDIA/DIGITS/issues/206
map_size
=
1099511627776
*
2
if
platform
.
system
()
==
'Linux'
else
128
*
10
**
6
db
=
lmdb
.
open
(
path
,
subdir
=
isdir
,
map_size
=
1099511627776
*
2
,
readonly
=
False
,
map_size
=
map_size
,
readonly
=
False
,
meminit
=
False
,
map_async
=
True
)
# need sync() at the end
size
=
_reset_df_and_get_size
(
df
)
# put data into lmdb, and doubling the size if full.
# Ref: https://github.com/NVIDIA/DIGITS/pull/209/files
def
put_or_grow
(
txn
,
key
,
value
):
try
:
txn
.
put
(
key
,
value
)
return
txn
except
lmdb
.
MapFullError
:
pass
txn
.
abort
()
curr_size
=
db
.
info
()[
'map_size'
]
new_size
=
curr_size
*
2
logger
.
info
(
"Doubling LMDB map_size to {:.2f}GB"
.
format
(
new_size
/
10
**
9
))
db
.
set_mapsize
(
new_size
)
txn
=
db
.
begin
(
write
=
True
)
txn
=
put_or_grow
(
txn
,
key
,
value
)
return
txn
with
get_tqdm
(
total
=
size
)
as
pbar
:
idx
=
-
1
...
...
@@ -58,7 +80,7 @@ class LMDBSerializer():
# although it has a context manager interface
txn
=
db
.
begin
(
write
=
True
)
for
idx
,
dp
in
enumerate
(
df
):
txn
.
put
(
u'{:08}'
.
format
(
idx
)
.
encode
(
'ascii'
),
dumps
(
dp
))
txn
=
put_or_grow
(
txn
,
u'{:08}'
.
format
(
idx
)
.
encode
(
'ascii'
),
dumps
(
dp
))
pbar
.
update
()
if
(
idx
+
1
)
%
write_frequency
==
0
:
txn
.
commit
()
...
...
@@ -67,7 +89,7 @@ class LMDBSerializer():
keys
=
[
u'{:08}'
.
format
(
k
)
.
encode
(
'ascii'
)
for
k
in
range
(
idx
+
1
)]
with
db
.
begin
(
write
=
True
)
as
txn
:
txn
.
put
(
b
'__keys__'
,
dumps
(
keys
))
txn
=
put_or_grow
(
txn
,
b
'__keys__'
,
dumps
(
keys
))
logger
.
info
(
"Flushing database ..."
)
db
.
sync
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment