Commit 47de91da authored by Yuxin Wu's avatar Yuxin Wu

fix checkpoint dir issue under distributed training

parent 82a8953e
...@@ -42,7 +42,10 @@ class ModelSaver(Callback): ...@@ -42,7 +42,10 @@ class ModelSaver(Callback):
if checkpoint_dir is not None: if checkpoint_dir is not None:
if not tf.gfile.IsDirectory(checkpoint_dir): # v2: tf.io.gfile.isdir if not tf.gfile.IsDirectory(checkpoint_dir): # v2: tf.io.gfile.isdir
tf.gfile.MakeDirs(checkpoint_dir) # v2: tf.io.gfile.makedirs tf.gfile.MakeDirs(checkpoint_dir) # v2: tf.io.gfile.makedirs
self.checkpoint_dir = os.path.normpath(checkpoint_dir) # If None, allow it to be init, but fail later if used
# For example, if chief_only=True, it can still be safely initialized
# in non-chief workers which don't have logger dir
self.checkpoint_dir = os.path.normpath(checkpoint_dir) if checkpoint_dir is not None else checkpoint_dir
def _setup_graph(self): def _setup_graph(self):
assert self.checkpoint_dir is not None, \ assert self.checkpoint_dir is not None, \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment