Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
076a728f
Commit
076a728f
authored
Feb 12, 2017
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
some docs update & add optimizer with gradproc
parent
3f91978c
Changes
19
Show whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
145 additions
and
71 deletions
+145
-71
examples/A3C-Gym/README.md
examples/A3C-Gym/README.md
+1
-0
examples/A3C-Gym/train-atari.py
examples/A3C-Gym/train-atari.py
+2
-2
examples/CTC-TIMIT/train-timit.py
examples/CTC-TIMIT/train-timit.py
+1
-1
examples/Char-RNN/char-rnn.py
examples/Char-RNN/char-rnn.py
+1
-1
examples/DeepQNetwork/DQN.py
examples/DeepQNetwork/DQN.py
+2
-2
examples/GAN/InfoGAN-mnist.py
examples/GAN/InfoGAN-mnist.py
+5
-7
examples/HED/hed.py
examples/HED/hed.py
+2
-1
examples/PennTreebank/PTB-LSTM.py
examples/PennTreebank/PTB-LSTM.py
+1
-1
examples/ResNet/README.md
examples/ResNet/README.md
+4
-0
examples/ResNet/cifar10-resnet.py
examples/ResNet/cifar10-resnet.py
+3
-0
examples/ResNet/imagenet-resnet.py
examples/ResNet/imagenet-resnet.py
+0
-5
examples/SpatialTransformer/mnist-addition.py
examples/SpatialTransformer/mnist-addition.py
+2
-2
examples/mnist-convnet.py
examples/mnist-convnet.py
+4
-2
tensorpack/models/model_desc.py
tensorpack/models/model_desc.py
+28
-42
tensorpack/tfutils/__init__.py
tensorpack/tfutils/__init__.py
+0
-1
tensorpack/tfutils/gradproc.py
tensorpack/tfutils/gradproc.py
+1
-1
tensorpack/tfutils/modelutils.py
tensorpack/tfutils/modelutils.py
+37
-2
tensorpack/tfutils/optimizer.py
tensorpack/tfutils/optimizer.py
+49
-0
tensorpack/train/multigpu.py
tensorpack/train/multigpu.py
+2
-1
No files found.
examples/A3C-Gym/README.md
View file @
076a728f
...
...
@@ -19,6 +19,7 @@ Also note that multi-GPU doesn't give you obvious speedup here,
because the bottleneck in this implementation is not computation but data.
Some practicical notes:
1.
On machines without huge memory, enabling tcmalloc may keep training throughput more stable.
2.
Occasionally, processes may not get terminated completely. It is suggested to use
`systemd-run`
to run any
multiprocess Python program to get a cgroup dedicated for the task.
...
...
examples/A3C-Gym/train-atari.py
View file @
076a728f
...
...
@@ -133,8 +133,8 @@ class Model(ModelDesc):
value_loss
,
pred_reward
,
advantage
,
self
.
cost
)
def
get_gradient_processor
(
self
):
return
[
MapGradient
(
lambda
grad
:
tf
.
clip_by_average_norm
(
grad
,
0.1
)),
SummaryGradient
()]
return
[
gradproc
.
MapGradient
(
lambda
grad
:
tf
.
clip_by_average_norm
(
grad
,
0.1
)),
gradproc
.
SummaryGradient
()]
class
MySimulatorMaster
(
SimulatorMaster
,
Callback
):
...
...
examples/CTC-TIMIT/train-timit.py
View file @
076a728f
...
...
@@ -74,7 +74,7 @@ class Model(ModelDesc):
summary
.
add_moving_summary
(
err
,
self
.
cost
)
def
get_gradient_processor
(
self
):
return
[
GlobalNormClip
(
5
),
SummaryGradient
()]
return
[
gradproc
.
GlobalNormClip
(
5
),
gradproc
.
SummaryGradient
()]
def
get_data
(
path
,
isTrain
,
stat_file
):
...
...
examples/Char-RNN/char-rnn.py
View file @
076a728f
...
...
@@ -106,7 +106,7 @@ class Model(ModelDesc):
summary
.
add_moving_summary
(
self
.
cost
)
def
get_gradient_processor
(
self
):
return
[
GlobalNormClip
(
5
)]
return
[
gradproc
.
GlobalNormClip
(
5
)]
def
get_config
():
...
...
examples/DeepQNetwork/DQN.py
View file @
076a728f
...
...
@@ -150,8 +150,8 @@ class Model(ModelDesc):
return
tf
.
group
(
*
ops
,
name
=
'update_target_network'
)
def
get_gradient_processor
(
self
):
return
[
MapGradient
(
lambda
grad
:
tf
.
clip_by_global_norm
([
grad
],
5
)[
0
][
0
]
),
SummaryGradient
()]
return
[
gradproc
.
GlobalNormalClip
(
10
),
gradproc
.
SummaryGradient
()]
def
get_config
():
...
...
examples/GAN/InfoGAN-mnist.py
View file @
076a728f
...
...
@@ -96,17 +96,13 @@ class Model(GANModelDesc):
fake_sample_viz
=
tf
.
cast
((
fake_sample
+
1
)
*
128.0
,
tf
.
uint8
,
name
=
'viz'
)
tf
.
summary
.
image
(
'gen'
,
fake_sample_viz
,
max_outputs
=
30
)
#
TODO
investigate how bn stats should be updated across two discrim
#
may need to
investigate how bn stats should be updated across two discrim
with
tf
.
variable_scope
(
'discrim'
):
real_pred
,
_
=
self
.
discriminator
(
real_sample
)
with
tf
.
variable_scope
(
'discrim'
,
reuse
=
True
):
fake_pred
,
dist_param
=
self
.
discriminator
(
fake_sample
)
# post-process output vector from discriminator to become valid
# distribution parameters
encoder_activation
=
self
.
factors
.
encoder_activation
(
dist_param
)
"""
Mutual information between x (i.e. zc in this case) and some
information s (the generated samples in this case):
...
...
@@ -130,6 +126,8 @@ class Model(GANModelDesc):
# Adding this term may make the curve less stable because the
# entropy estimated from the samples is not the true value.
# post-process output vector from discriminator to obtain valid distribution parameters
encoder_activation
=
self
.
factors
.
encoder_activation
(
dist_param
)
cond_ents
=
self
.
factors
.
entropy
(
zc
,
encoder_activation
)
cond_entropy
=
tf
.
add_n
(
cond_ents
,
name
=
"total_conditional_entropy"
)
...
...
@@ -139,7 +137,7 @@ class Model(GANModelDesc):
# default GAN objective
self
.
build_losses
(
real_pred
,
fake_pred
)
# subtract mutual information for latent factor
e
s (we want to maximize them)
# subtract mutual information for latent factors (we want to maximize them)
self
.
g_loss
=
tf
.
subtract
(
self
.
g_loss
,
MI
,
name
=
'total_g_loss'
)
self
.
d_loss
=
tf
.
subtract
(
self
.
d_loss
,
MI
,
name
=
'total_d_loss'
)
...
...
@@ -150,7 +148,7 @@ class Model(GANModelDesc):
def
get_gradient_processor_g
(
self
):
# generator learns 5 times faster
return
[
CheckGradient
(),
ScaleGradient
((
'.*'
,
5
),
log
=
False
)]
return
[
gradproc
.
ScaleGradient
((
'.*'
,
5
),
log
=
False
)]
def
get_data
():
...
...
examples/HED/hed.py
View file @
076a728f
...
...
@@ -93,7 +93,8 @@ class Model(ModelDesc):
add_moving_summary
(
costs
+
[
wrong
,
self
.
cost
])
def
get_gradient_processor
(
self
):
return
[
ScaleGradient
([(
'convfcweight.*'
,
0.1
),
(
'conv5_.*'
,
5
)])]
return
[
gradproc
.
ScaleGradient
([
(
'convfcweight.*'
,
0.1
),
(
'conv5_.*'
,
5
)])]
def
get_data
(
name
):
...
...
examples/PennTreebank/PTB-LSTM.py
View file @
076a728f
...
...
@@ -101,7 +101,7 @@ class Model(ModelDesc):
s
[
1
]
.
h
.
assign
(
z
))
def
get_gradient_processor
(
self
):
return
[
GlobalNormClip
(
5
)]
return
[
gradproc
.
GlobalNormClip
(
5
)]
def
get_config
():
...
...
examples/ResNet/README.md
View file @
076a728f
...
...
@@ -12,6 +12,10 @@ Models can be [downloaded here](https://goo.gl/6XjK9V).
| ResNet 50 | 7.13% | 24.12% |
| ResNet 101 | 6.54% | 22.89% |
```
bash
./imagenet-resnet.py
--data
/path/to/ILSVRC
--gpu
0,1,2,3
-d
18
```

## load-resnet.py
...
...
examples/ResNet/cifar10-resnet.py
View file @
076a728f
...
...
@@ -25,6 +25,9 @@ n=5, about 7.1% val error after 67k steps (8.6 step/s)
n=18, about 5.95
%
val error after 80k steps (2.6 step/s)
n=30: a 182-layer network, about 5.6
%
val error after 51k steps (1.55 step/s)
This model uses the whole training set instead of a train-val split.
To train:
./cifar10-resnet.py --gpu 0,1
"""
BATCH_SIZE
=
128
...
...
examples/ResNet/imagenet-resnet.py
View file @
076a728f
...
...
@@ -17,11 +17,6 @@ from tensorpack.utils.stats import RatioCounter
from
tensorpack.tfutils.symbolic_functions
import
*
from
tensorpack.tfutils.summary
import
*
"""
Training code of Pre-Activation version of ResNet on ImageNet.
It mainly follows the setup in fb.resnet.torch, and get similar performance.
"""
TOTAL_BATCH_SIZE
=
256
INPUT_SHAPE
=
224
DEPTH
=
None
...
...
examples/SpatialTransformer/mnist-addition.py
View file @
076a728f
...
...
@@ -86,8 +86,8 @@ class Model(ModelDesc):
self
.
cost
=
tf
.
add_n
([
wd_cost
,
cost
],
name
=
'cost'
)
def
get_gradient_processor
(
self
):
return
[
MapGradient
(
lambda
grad
:
tf
.
clip_by_global_norm
([
grad
],
5
)[
0
][
0
]
),
ScaleGradient
((
'STN.*'
,
0.1
)),
SummaryGradient
()]
return
[
gradproc
.
ScaleGradient
((
'STN.*'
,
0.1
)
),
gradproc
.
SummaryGradient
()]
def
get_data
(
isTrain
):
...
...
examples/mnist-convnet.py
View file @
076a728f
...
...
@@ -24,8 +24,10 @@ USE_SLIM = False
class
Model
(
ModelDesc
):
def
_get_inputs
(
self
):
"""Define all the input variables (with type, shape, name) that'll be
fed into the graph to produce a cost. """
"""
Define all the inputs (with type, shape, name) that
the graph will need.
"""
return
[
InputDesc
(
tf
.
float32
,
(
None
,
IMAGE_SIZE
,
IMAGE_SIZE
),
'input'
),
InputDesc
(
tf
.
int32
,
(
None
,),
'label'
)]
...
...
tensorpack/models/model_desc.py
View file @
076a728f
...
...
@@ -9,9 +9,9 @@ import pickle
import
six
from
..utils
import
logger
,
INPUTS_KEY
from
..utils.argtools
import
memoized
from
..tfutils.modelutils
import
apply_slim_collections
from
..tfutils.gradproc
import
CheckGradient
from
..tfutils.summary
import
add_moving_summary
from
..tfutils.tower
import
get_current_tower_context
__all__
=
[
'InputDesc'
,
'InputVar'
,
'ModelDesc'
,
'ModelFromMetaGraph'
]
...
...
@@ -41,8 +41,10 @@ class InputDesc(object):
return
pickle
.
loads
(
buf
)
# TODO print warning?
InputVar
=
InputDesc
class
InputVar
(
InputDesc
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
logger
.
warn
(
"[Deprecated] InputVar was renamed to InputDesc!"
)
super
(
InputVar
,
self
)
.
__init__
(
*
args
,
**
kwargs
)
@
six
.
add_metaclass
(
ABCMeta
)
...
...
@@ -50,6 +52,7 @@ class ModelDesc(object):
""" Base class for a model description """
# inputs:
@
memoized
def
get_reused_placehdrs
(
self
):
"""
Create or return (if already created) raw input TF placeholders in the graph.
...
...
@@ -57,11 +60,7 @@ class ModelDesc(object):
Returns:
list[tf.Tensor]: the list of input placeholders in the graph.
"""
if
hasattr
(
self
,
'reuse_input_vars'
):
return
self
.
reuse_input_vars
ret
=
self
.
build_placeholders
()
self
.
reuse_input_vars
=
ret
return
ret
return
self
.
build_placeholders
()
def
get_input_vars
(
self
):
# this wasn't a public API anyway
...
...
@@ -70,7 +69,7 @@ class ModelDesc(object):
def
build_placeholders
(
self
,
prefix
=
''
):
"""
For each
input
, create new placeholders with optional prefix and
For each
InputDesc
, create new placeholders with optional prefix and
return them. Useful when building new towers.
Returns:
...
...
@@ -105,8 +104,6 @@ class ModelDesc(object):
def
_get_input_vars
(
self
):
# keep backward compatibility
raise
NotImplementedError
()
# build graph:
def
build_graph
(
self
,
model_inputs
):
"""
Build the whole symbolic graph.
...
...
@@ -121,46 +118,35 @@ class ModelDesc(object):
def
_build_graph
(
self
,
inputs
):
pass
# set cost. Only for single-cost model.
def
get_cost
(
self
):
"""
Return the cost tensor in the graph. Called by some of the :class:`tensorpack.train.Trainer` which
assumes single-cost models.
Return the cost tensor in the graph.
Used by some of the tensorpack :class:`Trainer` which assumes single-cost models.
You can ignore this method if you use your own trainer with more than one cost.
This function also apply tfslim collections to the cost automatically, including
``tf.GraphKeys.REGULARIZATION_LOSSES`` and
``tf.GraphKeys.UPDATE_OPS``. This is because slim users would expect
the regularizer being automatically applied once used in slim layers.
"""
It calls :meth:`ModelDesc._get_cost()` which by default returns
``self.cost``. You can override :meth:`_get_cost()` if needed.
# the model cost so far
This function also applies tfslim collections to the cost automatically,
including ``tf.GraphKeys.REGULARIZATION_LOSSES`` and ``tf.GraphKeys.UPDATE_OPS``.
This is because slim users would expect the regularizer being automatically applied once used in slim layers.
"""
cost
=
self
.
_get_cost
()
regulization_losses
=
set
(
tf
.
get_collection
(
tf
.
GraphKeys
.
REGULARIZATION_LOSSES
))
if
len
(
regulization_losses
)
>
0
:
reg_loss
=
tf
.
add_n
(
list
(
regulization_losses
),
name
=
"regularize_loss"
)
cost
=
tf
.
add
(
reg_loss
,
cost
,
name
=
'total_cost'
)
add_moving_summary
(
reg_loss
,
cost
)
# As these batch-norm statistics quickly accumulate, there is no significant loss of accuracy
# if only the main tower handles all batch-normalization updates, which are then shared across
# the towers
ctx
=
get_current_tower_context
()
if
ctx
is
not
None
and
ctx
.
is_main_training_tower
:
non_grad_updates
=
set
(
tf
.
get_collection
(
tf
.
GraphKeys
.
UPDATE_OPS
))
if
non_grad_updates
:
logger
.
info
(
"Apply UPDATE_OPS collection on cost."
)
with
tf
.
control_dependencies
(
non_grad_updates
):
cost
=
tf
.
identity
(
cost
)
return
cost
return
apply_slim_collections
(
cost
)
def
_get_cost
(
self
,
*
args
):
return
self
.
cost
# set optimizer. only for single-optimizer model.
@
memoized
def
get_optimizer
(
self
):
"""
Return the optimizer used in the task.
Used by some of the tensorpack :class:`Trainer` which only uses a single optimizer.
You can ignore this method if you use your own trainer with more than one optimizers.
Users of :class:`ModelDesc` will need to implement `_get_optimizer()`,
which will only be called once per each model.
Returns:
a :class:`tf.train.Optimizer` instance.
"""
...
...
@@ -170,7 +156,7 @@ class ModelDesc(object):
raise
NotImplementedError
()
def
get_gradient_processor
(
self
):
""" Return a list of :class:`tensorpack.tfutils.GradientProcessor`.
"""
(Deprecated)
Return a list of :class:`tensorpack.tfutils.GradientProcessor`.
They will be executed by the trainer in the given order.
"""
return
[
# SummaryGradient(),
...
...
tensorpack/tfutils/__init__.py
View file @
076a728f
...
...
@@ -19,7 +19,6 @@ def _global_import(name):
_TO_IMPORT
=
set
([
'common'
,
'sessinit'
,
'gradproc'
,
'argscope'
,
'tower'
])
...
...
tensorpack/tfutils/gradproc.py
View file @
076a728f
...
...
@@ -20,7 +20,7 @@ def apply_grad_processors(grads, gradprocs):
"""
Args:
grads (list): list of (grad, var).
gradprocs (list
): list of :class:`GradientProcessor` instances
.
gradprocs (list
[GradientProcessor]): gradient processors to apply
.
Returns:
list: list of (grad, var) went through the processors.
"""
...
...
tensorpack/tfutils/modelutils.py
View file @
076a728f
# -*- coding: UTF-8 -*-
# File: modelutils.py
# Author:
Yuxin Wu <ppwwyyxx@gmail.com>
# Author:
tensorpack contributors
import
tensorflow
as
tf
from
termcolor
import
colored
from
..utils
import
logger
from
.summary
import
add_moving_summary
from
.tower
import
get_current_tower_context
__all__
=
[
'describe_model'
,
'get_shape_str'
]
__all__
=
[
'describe_model'
,
'get_shape_str'
,
'apply_slim_collections'
]
def
describe_model
():
...
...
@@ -46,3 +48,36 @@ def get_shape_str(tensors):
assert
isinstance
(
tensors
,
(
tf
.
Tensor
,
tf
.
Variable
)),
"Not a tensor: {}"
.
format
(
type
(
tensors
))
shape_str
=
str
(
tensors
.
get_shape
()
.
as_list
())
return
shape_str
def
apply_slim_collections
(
cost
):
"""
Apply slim collections to the cost, including:
1. adding the cost with the regularizers in ``tf.GraphKeys.REGULARIZATION_LOSSES``.
2. make the cost depend on ``tf.GraphKeys.UPDATE_OPS``.
Args:
cost: a scalar tensor
Return:
a scalar tensor, the cost after applying the collections.
"""
regulization_losses
=
set
(
tf
.
get_collection
(
tf
.
GraphKeys
.
REGULARIZATION_LOSSES
))
if
len
(
regulization_losses
)
>
0
:
logger
.
info
(
"Applying REGULARIZATION_LOSSES on cost."
)
reg_loss
=
tf
.
add_n
(
list
(
regulization_losses
),
name
=
"regularize_loss"
)
cost
=
tf
.
add
(
reg_loss
,
cost
,
name
=
'total_cost'
)
add_moving_summary
(
reg_loss
,
cost
)
# As these batch-norm statistics quickly accumulate, there is no significant loss of accuracy
# if only the main tower handles all batch-normalization updates, which are then shared across
# the towers
ctx
=
get_current_tower_context
()
if
ctx
is
not
None
and
ctx
.
is_main_training_tower
:
non_grad_updates
=
set
(
tf
.
get_collection
(
tf
.
GraphKeys
.
UPDATE_OPS
))
if
non_grad_updates
:
logger
.
info
(
"Applying UPDATE_OPS collection on cost."
)
with
tf
.
control_dependencies
(
non_grad_updates
):
cost
=
tf
.
identity
(
cost
,
name
=
'cost_with_update'
)
return
cost
tensorpack/tfutils/optimizer.py
0 → 100644
View file @
076a728f
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# File: optimizer.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import
tensorflow
as
tf
from
.gradproc
import
apply_grad_processors
as
apply_gradproc
class
ProxyOptimizer
(
tf
.
train
.
Optimizer
):
def
__init__
(
self
,
opt
):
self
.
_opt
=
opt
def
compute_gradients
(
self
,
*
args
,
**
kwargs
):
return
self
.
_opt
.
compute_gradients
(
*
args
,
**
kwargs
)
def
get_slot
(
self
,
*
args
,
**
kwargs
):
return
self
.
_opt
.
get_slot
(
*
args
,
**
kwargs
)
def
get_slot_names
(
self
,
*
args
,
**
kwargs
):
return
self
.
_opt
.
get_slot_names
(
*
args
,
**
kwargs
)
def
apply_gradients
(
self
,
*
args
,
**
kwargs
):
return
self
.
_opt
.
apply_gradients
(
*
args
,
**
kwargs
)
def
apply_grad_processors
(
opt
,
gradprocs
):
"""
Wrapper around optimizers to apply gradient processors.
Args:
opt (tf.train.Optimizer):
gradprocs (list[GradientProcessor]): gradient processors to add to the
optimizer.
Returns:
a :class:`tf.train.Optimizer` instance which runs the gradient
processors before updating the variables.
"""
class
_ApplyGradientProcessor
(
ProxyOptimizer
):
def
__init__
(
self
,
opt
,
gradprocs
):
self
.
_gradprocs
=
gradprocs
super
(
_ApplyGradientProcessor
,
self
)
.
__init__
(
opt
)
def
apply_gradients
(
self
,
grads_and_vars
,
global_step
=
None
,
name
=
None
):
g
=
apply_gradproc
(
grads_and_vars
,
self
.
_gradprocs
)
return
self
.
_opt
.
apply_gradients
(
g
,
global_step
,
name
)
return
_ApplyGradientProcessor
(
opt
,
gradprocs
)
tensorpack/train/multigpu.py
View file @
076a728f
...
...
@@ -82,10 +82,11 @@ class SyncMultiGPUTrainer(MultiGPUTrainer,
Args:
config, input_queue: same as in :class:`QueueInputTrainer`.
average_cost (bool): average the cost (instead of gradients) from
each tower and did backprop only once.
Should no make
each tower and did backprop only once.
This option should make no
difference mathematically, but may affect speed.
"""
if
config
.
dataflow
is
not
None
:
# use queueinput by default. May need to avoid this in the future (when more input type is available)
self
.
_input_method
=
QueueInput
(
config
.
dataflow
,
input_queue
)
else
:
self
.
_input_method
=
config
.
data
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment