Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
43f7ca75
Commit
43f7ca75
authored
Mar 04, 2018
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
EstimatedTimeLeft callback
parent
4f52bcfd
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
179 additions
and
135 deletions
+179
-135
.travis.yml
.travis.yml
+3
-3
docs/conf.py
docs/conf.py
+1
-0
docs/requirements.txt
docs/requirements.txt
+1
-1
docs/tutorial/callback.md
docs/tutorial/callback.md
+4
-2
examples/FasterRCNN/train.py
examples/FasterRCNN/train.py
+2
-2
examples/ResNet/imagenet-resnet.py
examples/ResNet/imagenet-resnet.py
+1
-0
tensorpack/callbacks/__init__.py
tensorpack/callbacks/__init__.py
+1
-1
tensorpack/callbacks/graph.py
tensorpack/callbacks/graph.py
+64
-2
tensorpack/callbacks/misc.py
tensorpack/callbacks/misc.py
+97
-0
tensorpack/callbacks/stats.py
tensorpack/callbacks/stats.py
+5
-124
No files found.
.travis.yml
View file @
43f7ca75
...
@@ -29,7 +29,7 @@ matrix:
...
@@ -29,7 +29,7 @@ matrix:
env
:
TF_VERSION=1.6.0 TF_TYPE=release
env
:
TF_VERSION=1.6.0 TF_TYPE=release
-
os
:
linux
-
os
:
linux
python
:
3.5
python
:
3.5
env
:
TF_VERSION=1.6.0 TF_TYPE=release
env
:
TF_VERSION=1.6.0 TF_TYPE=release
PYPI=true
-
os
:
linux
-
os
:
linux
python
:
2.7
python
:
2.7
env
:
TF_VERSION=1.head TF_TYPE=nightly
env
:
TF_VERSION=1.head TF_TYPE=nightly
...
@@ -90,7 +90,7 @@ deploy:
...
@@ -90,7 +90,7 @@ deploy:
branch
:
master
branch
:
master
repo
:
ppwwyyxx/tensorpack
repo
:
ppwwyyxx/tensorpack
python
:
"
3.5"
python
:
"
3.5"
condition
:
"
$
TF_TYPE
=
releas
e"
condition
:
"
$
PYPI
=
tru
e"
-
provider
:
pypi
-
provider
:
pypi
server
:
https://testpypi.python.org/pypi
server
:
https://testpypi.python.org/pypi
...
@@ -103,4 +103,4 @@ deploy:
...
@@ -103,4 +103,4 @@ deploy:
branch
:
test-travis
branch
:
test-travis
repo
:
ppwwyyxx/tensorpack
repo
:
ppwwyyxx/tensorpack
python
:
"
3.5"
python
:
"
3.5"
condition
:
"
$
TF_TYPE
=
releas
e"
condition
:
"
$
PYPI
=
tru
e"
docs/conf.py
View file @
43f7ca75
...
@@ -368,6 +368,7 @@ def autodoc_skip_member(app, what, name, obj, skip, options):
...
@@ -368,6 +368,7 @@ def autodoc_skip_member(app, what, name, obj, skip, options):
'GaussianDeform'
,
'GaussianDeform'
,
'dump_chkpt_vars'
,
'dump_chkpt_vars'
,
'DumpTensor'
,
'DumpTensor'
,
'DumpParamAsImage'
,
'StagingInputWrapper'
,
'StagingInputWrapper'
,
'set_tower_func'
,
'set_tower_func'
,
'TryResumeTraining'
,
'TryResumeTraining'
,
...
...
docs/requirements.txt
View file @
43f7ca75
...
@@ -5,4 +5,4 @@ Sphinx>=1.6
...
@@ -5,4 +5,4 @@ Sphinx>=1.6
recommonmark==0.4.0
recommonmark==0.4.0
sphinx_rtd_theme
sphinx_rtd_theme
mock
mock
tensorflow
tensorflow
==1.5.0
docs/tutorial/callback.md
View file @
43f7ca75
...
@@ -45,8 +45,10 @@ callbacks=[
...
@@ -45,8 +45,10 @@ callbacks=[
'val-error-top1'
),
'val-error-top1'
),
# record GPU utilizations during training
# record GPU utilizations during training
GPUUtilizationTracker
(),
GPUUtilizationTracker
(),
# Touch a file to pause the training and start a debug shell, to observe what's going on
# touch a file to pause the training and start a debug shell, to observe what's going on
InjectShell
(
shell
=
'ipython'
)
InjectShell
(
shell
=
'ipython'
),
# estimate time until completion
EstimatedTimeLeft
()
]
+
[
# these callbacks are enabled by default already, though you can customize them
]
+
[
# these callbacks are enabled by default already, though you can customize them
# maintain those moving average summaries defined in the model (e.g. training loss, training error)
# maintain those moving average summaries defined in the model (e.g. training loss, training error)
MovingAverageSummary
(),
MovingAverageSummary
(),
...
...
examples/FasterRCNN/train.py
View file @
43f7ca75
...
@@ -298,8 +298,7 @@ def predict(pred_func, input_file):
...
@@ -298,8 +298,7 @@ def predict(pred_func, input_file):
class
EvalCallback
(
Callback
):
class
EvalCallback
(
Callback
):
def
_setup_graph
(
self
):
def
_setup_graph
(
self
):
self
.
pred
=
self
.
trainer
.
get_predictor
(
self
.
pred
=
self
.
trainer
.
get_predictor
(
[
'image'
],
[
'image'
],
get_model_output_names
())
get_model_output_names
())
self
.
df
=
get_eval_dataflow
()
self
.
df
=
get_eval_dataflow
()
def
_before_train
(
self
):
def
_before_train
(
self
):
...
@@ -389,6 +388,7 @@ if __name__ == '__main__':
...
@@ -389,6 +388,7 @@ if __name__ == '__main__':
ScheduledHyperParamSetter
(
'learning_rate'
,
lr_schedule
),
ScheduledHyperParamSetter
(
'learning_rate'
,
lr_schedule
),
EvalCallback
(),
EvalCallback
(),
GPUUtilizationTracker
(),
GPUUtilizationTracker
(),
EstimatedTimeLeft
(),
],
],
steps_per_epoch
=
stepnum
,
steps_per_epoch
=
stepnum
,
max_epoch
=
config
.
LR_SCHEDULE
[
2
]
*
factor
//
stepnum
,
max_epoch
=
config
.
LR_SCHEDULE
[
2
]
*
factor
//
stepnum
,
...
...
examples/ResNet/imagenet-resnet.py
View file @
43f7ca75
...
@@ -77,6 +77,7 @@ def get_config(model, fake=False):
...
@@ -77,6 +77,7 @@ def get_config(model, fake=False):
BASE_LR
=
0.1
*
(
args
.
batch
/
256.0
)
BASE_LR
=
0.1
*
(
args
.
batch
/
256.0
)
callbacks
=
[
callbacks
=
[
ModelSaver
(),
ModelSaver
(),
EstimatedTimeLeft
(),
ScheduledHyperParamSetter
(
ScheduledHyperParamSetter
(
'learning_rate'
,
[(
30
,
BASE_LR
*
1e-1
),
(
60
,
BASE_LR
*
1e-2
),
'learning_rate'
,
[(
30
,
BASE_LR
*
1e-1
),
(
60
,
BASE_LR
*
1e-2
),
(
85
,
BASE_LR
*
1e-3
),
(
95
,
BASE_LR
*
1e-4
),
(
105
,
BASE_LR
*
1e-5
)]),
(
85
,
BASE_LR
*
1e-3
),
(
95
,
BASE_LR
*
1e-4
),
(
105
,
BASE_LR
*
1e-5
)]),
...
...
tensorpack/callbacks/__init__.py
View file @
43f7ca75
...
@@ -17,7 +17,7 @@ if STATICA_HACK:
...
@@ -17,7 +17,7 @@ if STATICA_HACK:
from
.param
import
*
from
.param
import
*
from
.prof
import
*
from
.prof
import
*
from
.saver
import
*
from
.saver
import
*
from
.
stats
import
*
from
.
misc
import
*
from
.steps
import
*
from
.steps
import
*
from
.summary
import
*
from
.summary
import
*
from
.trigger
import
*
from
.trigger
import
*
...
...
tensorpack/callbacks/graph.py
View file @
43f7ca75
...
@@ -8,12 +8,14 @@
...
@@ -8,12 +8,14 @@
import
tensorflow
as
tf
import
tensorflow
as
tf
import
os
import
os
import
numpy
as
np
import
numpy
as
np
from
six.moves
import
zip
from
..utils
import
logger
from
..utils
import
logger
from
.base
import
Callback
from
.base
import
Callback
from
six.moves
import
zip
from
..tfutils.common
import
get_op_tensor_name
__all__
=
[
'RunOp'
,
'RunUpdateOps'
,
'ProcessTensors'
,
'DumpTensors'
,
'DumpTensor'
]
__all__
=
[
'RunOp'
,
'RunUpdateOps'
,
'ProcessTensors'
,
'DumpTensors'
,
'DumpTensor'
,
'DumpTensorAsImage'
,
'DumpParamAsImage'
]
class
RunOp
(
Callback
):
class
RunOp
(
Callback
):
...
@@ -158,4 +160,64 @@ class DumpTensors(ProcessTensors):
...
@@ -158,4 +160,64 @@ class DumpTensors(ProcessTensors):
super
(
DumpTensors
,
self
)
.
__init__
(
names
,
fn
)
super
(
DumpTensors
,
self
)
.
__init__
(
names
,
fn
)
class
DumpTensorAsImage
(
Callback
):
"""
Dump a tensor to image(s) to ``logger.get_logger_dir()`` once triggered.
Note that it requires the tensor is directly evaluable, i.e. either inputs
are not its dependency (e.g. the weights of the model), or the inputs are
feedfree (in which case this callback will take an extra datapoint from the input pipeline).
"""
def
__init__
(
self
,
tensor_name
,
prefix
=
None
,
map_func
=
None
,
scale
=
255
):
"""
Args:
tensor_name (str): the name of the tensor.
prefix (str): the filename prefix for saved images. Defaults to the Op name.
map_func: map the value of the tensor to an image or list of
images of shape [h, w] or [h, w, c]. If None, will use identity.
scale (float): a multiplier on pixel values, applied after map_func.
"""
op_name
,
self
.
tensor_name
=
get_op_tensor_name
(
tensor_name
)
self
.
func
=
map_func
if
prefix
is
None
:
self
.
prefix
=
op_name
else
:
self
.
prefix
=
prefix
self
.
log_dir
=
logger
.
get_logger_dir
()
self
.
scale
=
scale
def
_before_train
(
self
):
self
.
_tensor
=
self
.
graph
.
get_tensor_by_name
(
self
.
tensor_name
)
def
_trigger
(
self
):
val
=
self
.
trainer
.
sess
.
run
(
self
.
_tensor
)
if
self
.
func
is
not
None
:
val
=
self
.
func
(
val
)
if
isinstance
(
val
,
list
)
or
val
.
ndim
==
4
:
for
idx
,
im
in
enumerate
(
val
):
self
.
_dump_image
(
im
,
idx
)
else
:
self
.
_dump_image
(
val
)
self
.
trainer
.
monitors
.
put_image
(
self
.
prefix
,
val
)
def
_dump_image
(
self
,
im
,
idx
=
None
):
assert
im
.
ndim
in
[
2
,
3
],
str
(
im
.
ndim
)
fname
=
os
.
path
.
join
(
self
.
log_dir
,
self
.
prefix
+
'-ep{:03d}{}.png'
.
format
(
self
.
epoch_num
,
'-'
+
str
(
idx
)
if
idx
else
''
))
res
=
im
*
self
.
scale
res
=
np
.
clip
(
res
,
0
,
255
)
cv2
.
imwrite
(
fname
,
res
.
astype
(
'uint8'
))
try
:
import
cv2
except
ImportError
:
from
..utils.develop
import
create_dummy_class
DumpTensorAsImage
=
create_dummy_class
(
'DumpTensorAsImage'
,
'cv2'
)
# noqa
# alias
DumpParamAsImage
=
DumpTensorAsImage
DumpTensor
=
DumpTensors
DumpTensor
=
DumpTensors
tensorpack/callbacks/misc.py
0 → 100644
View file @
43f7ca75
# -*- coding: utf-8 -*-
# File: misc.py
import
os
import
time
from
collections
import
deque
import
numpy
as
np
from
.base
import
Callback
from
..utils.utils
import
humanize_time_delta
from
..utils
import
logger
__all__
=
[
'SendStat'
,
'InjectShell'
,
'EstimatedTimeLeft'
]
class
SendStat
(
Callback
):
""" An equivalent of :class:`SendMonitorData`, but as a normal callback. """
def
__init__
(
self
,
command
,
names
):
self
.
command
=
command
if
not
isinstance
(
names
,
list
):
names
=
[
names
]
self
.
names
=
names
def
_trigger
(
self
):
M
=
self
.
trainer
.
monitors
v
=
{
k
:
M
.
get_latest
(
k
)
for
k
in
self
.
names
}
cmd
=
self
.
command
.
format
(
**
v
)
ret
=
os
.
system
(
cmd
)
if
ret
!=
0
:
logger
.
error
(
"Command {} failed with ret={}!"
.
format
(
cmd
,
ret
))
class
InjectShell
(
Callback
):
"""
Allow users to create a specific file as a signal to pause
and iteratively debug the training.
Once triggered, it detects whether the file exists, and opens an
IPython/pdb shell if yes.
In the shell, `self` is this callback, `self.trainer` is the trainer, and
from that you can access everything else.
"""
def
__init__
(
self
,
file
=
'INJECT_SHELL.tmp'
,
shell
=
'ipython'
):
"""
Args:
file (str): if this file exists, will open a shell.
shell (str): one of 'ipython', 'pdb'
"""
self
.
_file
=
file
assert
shell
in
[
'ipython'
,
'pdb'
]
self
.
_shell
=
shell
logger
.
info
(
"Create a file '{}' to open {} shell."
.
format
(
file
,
shell
))
def
_trigger
(
self
):
if
os
.
path
.
isfile
(
self
.
_file
):
logger
.
info
(
"File {} exists, entering shell."
.
format
(
self
.
_file
))
self
.
_inject
()
def
_inject
(
self
):
trainer
=
self
.
trainer
# noqa
if
self
.
_shell
==
'ipython'
:
import
IPython
as
IP
# noqa
IP
.
embed
()
elif
self
.
_shell
==
'pdb'
:
import
pdb
# noqa
pdb
.
set_trace
()
def
_after_train
(
self
):
if
os
.
path
.
isfile
(
self
.
_file
):
os
.
unlink
(
self
.
_file
)
class
EstimatedTimeLeft
(
Callback
):
"""
Estimate the time left until completion of training.
"""
def
__init__
(
self
,
last_k_epochs
=
5
):
"""
Args:
last_k_epochs (int): Use the time spent on last k epochs to
estimate total time left.
"""
self
.
_times
=
deque
(
maxlen
=
last_k_epochs
)
def
_before_train
(
self
):
self
.
_max_epoch
=
self
.
trainer
.
max_epoch
self
.
_last_time
=
time
.
time
()
def
_trigger_epoch
(
self
):
duration
=
time
.
time
()
-
self
.
_last_time
self
.
_last_time
=
time
.
time
()
self
.
_times
.
append
(
duration
)
average_epoch_time
=
np
.
mean
(
self
.
_times
)
time_left
=
(
self
.
_max_epoch
-
self
.
epoch_num
)
*
average_epoch_time
logger
.
info
(
"Estimated Time Left: "
+
humanize_time_delta
(
time_left
))
tensorpack/callbacks/stats.py
View file @
43f7ca75
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
# File: stats.py
# File: stats.py
# for compatibility only
from
.misc
import
InjectShell
,
SendStat
# noqa
from
.graph
import
DumpParamAsImage
# noqa
import
os
__all__
=
[]
import
numpy
as
np
from
.base
import
Callback
from
..utils
import
logger
from
..tfutils.common
import
get_op_tensor_name
__all__
=
[
'SendStat'
,
'DumpParamAsImage'
,
'InjectShell'
]
class
SendStat
(
Callback
):
""" An equivalent of :class:`SendMonitorData`, but as a normal callback. """
def
__init__
(
self
,
command
,
names
):
self
.
command
=
command
if
not
isinstance
(
names
,
list
):
names
=
[
names
]
self
.
names
=
names
def
_trigger
(
self
):
M
=
self
.
trainer
.
monitors
v
=
{
k
:
M
.
get_latest
(
k
)
for
k
in
self
.
names
}
cmd
=
self
.
command
.
format
(
**
v
)
ret
=
os
.
system
(
cmd
)
if
ret
!=
0
:
logger
.
error
(
"Command {} failed with ret={}!"
.
format
(
cmd
,
ret
))
class
InjectShell
(
Callback
):
"""
Allow users to create a specific file as a signal to pause
and iteratively debug the training.
Once triggered, it detects whether the file exists, and opens an
IPython/pdb shell if yes.
In the shell, `self` is this callback, `self.trainer` is the trainer, and
from that you can access everything else.
"""
def
__init__
(
self
,
file
=
'INJECT_SHELL.tmp'
,
shell
=
'ipython'
):
"""
Args:
file (str): if this file exists, will open a shell.
shell (str): one of 'ipython', 'pdb'
"""
self
.
_file
=
file
assert
shell
in
[
'ipython'
,
'pdb'
]
self
.
_shell
=
shell
logger
.
info
(
"Create a file '{}' to open {} shell."
.
format
(
file
,
shell
))
def
_trigger
(
self
):
if
os
.
path
.
isfile
(
self
.
_file
):
logger
.
info
(
"File {} exists, entering shell."
.
format
(
self
.
_file
))
self
.
_inject
()
def
_inject
(
self
):
trainer
=
self
.
trainer
# noqa
if
self
.
_shell
==
'ipython'
:
import
IPython
as
IP
# noqa
IP
.
embed
()
elif
self
.
_shell
==
'pdb'
:
import
pdb
# noqa
pdb
.
set_trace
()
def
_after_train
(
self
):
if
os
.
path
.
isfile
(
self
.
_file
):
os
.
unlink
(
self
.
_file
)
class
DumpParamAsImage
(
Callback
):
"""
Dump a tensor to image(s) to ``logger.get_logger_dir()`` once triggered.
Note that it requires the tensor is directly evaluable, i.e. either inputs
are not its dependency (e.g. the weights of the model), or the inputs are
feedfree (in which case this callback will take an extra datapoint from the input pipeline).
"""
def
__init__
(
self
,
tensor_name
,
prefix
=
None
,
map_func
=
None
,
scale
=
255
):
"""
Args:
tensor_name (str): the name of the tensor.
prefix (str): the filename prefix for saved images. Defaults to the Op name.
map_func: map the value of the tensor to an image or list of
images of shape [h, w] or [h, w, c]. If None, will use identity.
scale (float): a multiplier on pixel values, applied after map_func.
"""
op_name
,
self
.
tensor_name
=
get_op_tensor_name
(
tensor_name
)
self
.
func
=
map_func
if
prefix
is
None
:
self
.
prefix
=
op_name
else
:
self
.
prefix
=
prefix
self
.
log_dir
=
logger
.
get_logger_dir
()
self
.
scale
=
scale
def
_before_train
(
self
):
self
.
_tensor
=
self
.
graph
.
get_tensor_by_name
(
self
.
tensor_name
)
def
_trigger
(
self
):
val
=
self
.
trainer
.
sess
.
run
(
self
.
_tensor
)
if
self
.
func
is
not
None
:
val
=
self
.
func
(
val
)
if
isinstance
(
val
,
list
)
or
val
.
ndim
==
4
:
for
idx
,
im
in
enumerate
(
val
):
self
.
_dump_image
(
im
,
idx
)
else
:
self
.
_dump_image
(
val
)
self
.
trainer
.
monitors
.
put_image
(
self
.
prefix
,
val
)
def
_dump_image
(
self
,
im
,
idx
=
None
):
assert
im
.
ndim
in
[
2
,
3
],
str
(
im
.
ndim
)
fname
=
os
.
path
.
join
(
self
.
log_dir
,
self
.
prefix
+
'-ep{:03d}{}.png'
.
format
(
self
.
epoch_num
,
'-'
+
str
(
idx
)
if
idx
else
''
))
res
=
im
*
self
.
scale
res
=
np
.
clip
(
res
,
0
,
255
)
cv2
.
imwrite
(
fname
,
res
.
astype
(
'uint8'
))
try
:
import
cv2
except
ImportError
:
from
..utils.develop
import
create_dummy_class
DumpParamAsImage
=
create_dummy_class
(
'DumpParamAsImage'
,
'cv2'
)
# noqa
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment