Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
1b73d9cc
Commit
1b73d9cc
authored
Jan 06, 2017
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add mnist-slim example. Trainer does not automatically summary total cost any more
parent
cb99d524
Changes
16
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
86 additions
and
69 deletions
+86
-69
examples/Atari2600/DQN.py
examples/Atari2600/DQN.py
+1
-0
examples/DisturbLabel/mnist-disturb.py
examples/DisturbLabel/mnist-disturb.py
+1
-1
examples/DoReFa-Net/alexnet-dorefa.py
examples/DoReFa-Net/alexnet-dorefa.py
+1
-1
examples/DoReFa-Net/svhn-digit-dorefa.py
examples/DoReFa-Net/svhn-digit-dorefa.py
+1
-1
examples/HED/hed.py
examples/HED/hed.py
+1
-1
examples/Inception/inception-bn.py
examples/Inception/inception-bn.py
+1
-1
examples/Inception/inceptionv3.py
examples/Inception/inceptionv3.py
+1
-1
examples/OpenAIGym/train-atari.py
examples/OpenAIGym/train-atari.py
+2
-1
examples/TIMIT/train-timit.py
examples/TIMIT/train-timit.py
+1
-1
examples/char-rnn/char-rnn.py
examples/char-rnn/char-rnn.py
+1
-0
examples/mnist-convnet.py
examples/mnist-convnet.py
+56
-32
tensorpack/models/batch_norm.py
tensorpack/models/batch_norm.py
+1
-0
tensorpack/models/model_desc.py
tensorpack/models/model_desc.py
+15
-24
tensorpack/tfutils/summary.py
tensorpack/tfutils/summary.py
+1
-1
tensorpack/train/feedfree.py
tensorpack/train/feedfree.py
+1
-2
tensorpack/train/trainer.py
tensorpack/train/trainer.py
+1
-2
No files found.
examples/Atari2600/DQN.py
View file @
1b73d9cc
...
...
@@ -136,6 +136,7 @@ class Model(ModelDesc):
tf
.
cast
(
BATCH_SIZE
,
tf
.
float32
),
name
=
'cost'
)
summary
.
add_param_summary
([(
'conv.*/W'
,
[
'histogram'
,
'rms'
]),
(
'fc.*/W'
,
[
'histogram'
,
'rms'
])])
# monitor all W
add_moving_summary
(
self
.
cost
)
def
update_target_param
(
self
):
vars
=
tf
.
trainable_variables
()
...
...
examples/DisturbLabel/mnist-disturb.py
View file @
1b73d9cc
...
...
@@ -51,9 +51,9 @@ class Model(mnist_example.Model):
cost
=
tf
.
reduce_mean
(
cost
,
name
=
'cross_entropy_loss'
)
wd_cost
=
tf
.
mul
(
1e-5
,
regularize_cost
(
'fc.*/W'
,
tf
.
nn
.
l2_loss
),
name
=
'regularize_loss'
)
add_moving_summary
(
cost
,
wd_cost
)
self
.
cost
=
tf
.
add_n
([
wd_cost
,
cost
],
name
=
'cost'
)
add_moving_summary
(
cost
,
wd_cost
,
self
.
cost
)
if
__name__
==
'__main__'
:
...
...
examples/DoReFa-Net/alexnet-dorefa.py
View file @
1b73d9cc
...
...
@@ -156,10 +156,10 @@ class Model(ModelDesc):
# weight decay on all W of fc layers
wd_cost
=
regularize_cost
(
'fc.*/W'
,
l2_regularizer
(
5e-6
))
add_moving_summary
(
cost
,
wd_cost
)
add_param_summary
([(
'.*/W'
,
[
'histogram'
,
'rms'
])])
self
.
cost
=
tf
.
add_n
([
cost
,
wd_cost
],
name
=
'cost'
)
add_moving_summary
(
cost
,
wd_cost
,
self
.
cost
)
def
get_data
(
dataset_name
):
...
...
examples/DoReFa-Net/svhn-digit-dorefa.py
View file @
1b73d9cc
...
...
@@ -121,10 +121,10 @@ class Model(ModelDesc):
cost
=
tf
.
reduce_mean
(
cost
,
name
=
'cross_entropy_loss'
)
# weight decay on all W of fc layers
wd_cost
=
regularize_cost
(
'fc.*/W'
,
l2_regularizer
(
1e-7
))
add_moving_summary
(
cost
,
wd_cost
)
add_param_summary
([(
'.*/W'
,
[
'histogram'
,
'rms'
])])
self
.
cost
=
tf
.
add_n
([
cost
,
wd_cost
],
name
=
'cost'
)
add_moving_summary
(
cost
,
wd_cost
,
self
.
cost
)
def
get_config
():
...
...
examples/HED/hed.py
View file @
1b73d9cc
...
...
@@ -89,9 +89,9 @@ class Model(ModelDesc):
wd_cost
=
tf
.
mul
(
wd_w
,
regularize_cost
(
'.*/W'
,
tf
.
nn
.
l2_loss
),
name
=
'wd_cost'
)
costs
.
append
(
wd_cost
)
add_moving_summary
(
costs
+
[
wrong
])
add_param_summary
([(
'.*/W'
,
[
'histogram'
])])
# monitor W
self
.
cost
=
tf
.
add_n
(
costs
,
name
=
'cost'
)
add_moving_summary
(
costs
+
[
wrong
,
self
.
cost
])
def
get_gradient_processor
(
self
):
return
[
ScaleGradient
([(
'convfcweight.*'
,
0.1
),
(
'conv5_.*'
,
5
)])]
...
...
examples/Inception/inception-bn.py
View file @
1b73d9cc
...
...
@@ -114,10 +114,10 @@ class Model(ModelDesc):
wd_w
=
tf
.
train
.
exponential_decay
(
0.0002
,
get_global_step_var
(),
80000
,
0.7
,
True
)
wd_cost
=
tf
.
mul
(
wd_w
,
regularize_cost
(
'.*/W'
,
tf
.
nn
.
l2_loss
),
name
=
'l2_regularize_loss'
)
add_moving_summary
(
wd_cost
)
add_param_summary
([(
'.*/W'
,
[
'histogram'
])])
# monitor W
self
.
cost
=
tf
.
add_n
([
cost
,
wd_cost
],
name
=
'cost'
)
add_moving_summary
(
wd_cost
,
self
.
cost
)
def
get_data
(
train_or_test
):
...
...
examples/Inception/inceptionv3.py
View file @
1b73d9cc
...
...
@@ -193,9 +193,9 @@ class Model(ModelDesc):
wd_w
=
tf
.
train
.
exponential_decay
(
0.00004
,
get_global_step_var
(),
80000
,
0.7
,
True
)
wd_cost
=
tf
.
mul
(
wd_w
,
regularize_cost
(
'.*/W'
,
tf
.
nn
.
l2_loss
),
name
=
'l2_regularize_loss'
)
add_moving_summary
(
loss1
,
loss2
,
wd_cost
)
self
.
cost
=
tf
.
add_n
([
0.4
*
loss1
,
loss2
,
wd_cost
],
name
=
'cost'
)
add_moving_summary
(
loss1
,
loss2
,
wd_cost
,
self
.
cost
)
def
get_data
(
train_or_test
):
...
...
examples/OpenAIGym/train-atari.py
View file @
1b73d9cc
...
...
@@ -122,13 +122,14 @@ class Model(ModelDesc):
pred_reward
=
tf
.
reduce_mean
(
self
.
value
,
name
=
'predict_reward'
)
advantage
=
symbf
.
rms
(
advantage
,
name
=
'rms_advantage'
)
summary
.
add_moving_summary
(
policy_loss
,
xentropy_loss
,
value_loss
,
pred_reward
,
advantage
)
entropy_beta
=
tf
.
get_variable
(
'entropy_beta'
,
shape
=
[],
initializer
=
tf
.
constant_initializer
(
0.01
),
trainable
=
False
)
self
.
cost
=
tf
.
add_n
([
policy_loss
,
xentropy_loss
*
entropy_beta
,
value_loss
])
self
.
cost
=
tf
.
truediv
(
self
.
cost
,
tf
.
cast
(
tf
.
shape
(
futurereward
)[
0
],
tf
.
float32
),
name
=
'cost'
)
summary
.
add_moving_summary
(
policy_loss
,
xentropy_loss
,
value_loss
,
pred_reward
,
advantage
,
self
.
cost
)
def
get_gradient_processor
(
self
):
return
[
MapGradient
(
lambda
grad
:
tf
.
clip_by_average_norm
(
grad
,
0.1
)),
...
...
examples/TIMIT/train-timit.py
View file @
1b73d9cc
...
...
@@ -72,7 +72,7 @@ class Model(ModelDesc):
err
=
tf
.
edit_distance
(
predictions
,
label
,
normalize
=
True
)
err
.
set_shape
([
None
])
err
=
tf
.
reduce_mean
(
err
,
name
=
'error'
)
summary
.
add_moving_summary
(
err
)
summary
.
add_moving_summary
(
err
,
self
.
cost
)
def
get_gradient_processor
(
self
):
return
[
GlobalNormClip
(
5
),
SummaryGradient
()]
...
...
examples/char-rnn/char-rnn.py
View file @
1b73d9cc
...
...
@@ -92,6 +92,7 @@ class Model(ModelDesc):
logits
,
symbolic_functions
.
flatten
(
nextinput
))
self
.
cost
=
tf
.
reduce_mean
(
xent_loss
,
name
=
'cost'
)
summary
.
add_param_summary
([(
'.*/W'
,
[
'histogram'
])])
# monitor histogram of all W
summary
.
add_moving_summary
(
self
.
cost
)
def
get_gradient_processor
(
self
):
return
[
GlobalNormClip
(
5
)]
...
...
examples/mnist-convnet.py
View file @
1b73d9cc
...
...
@@ -5,6 +5,7 @@
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow.contrib.slim
as
slim
import
os
import
sys
import
argparse
...
...
@@ -18,6 +19,7 @@ about 0.6% validation error after 30 epochs.
from
tensorpack
import
*
IMAGE_SIZE
=
28
USE_SLIM
=
False
class
Model
(
ModelDesc
):
...
...
@@ -39,15 +41,30 @@ class Model(ModelDesc):
image
=
tf
.
expand_dims
(
image
,
3
)
image
=
image
*
2
-
1
# center the pixels values at zero
if
USE_SLIM
:
is_training
=
get_current_tower_context
()
.
is_training
with
slim
.
arg_scope
([
slim
.
layers
.
fully_connected
],
weights_regularizer
=
slim
.
l2_regularizer
(
1e-5
)):
l
=
slim
.
layers
.
conv2d
(
image
,
32
,
[
3
,
3
],
scope
=
'conv0'
)
l
=
slim
.
layers
.
max_pool2d
(
l
,
[
2
,
2
],
scope
=
'pool0'
)
l
=
slim
.
layers
.
conv2d
(
l
,
32
,
[
3
,
3
],
padding
=
'SAME'
,
scope
=
'conv1'
)
l
=
slim
.
layers
.
conv2d
(
l
,
32
,
[
3
,
3
],
scope
=
'conv2'
)
l
=
slim
.
layers
.
max_pool2d
(
l
,
[
2
,
2
],
scope
=
'pool1'
)
l
=
slim
.
layers
.
conv2d
(
l
,
32
,
[
3
,
3
],
scope
=
'conv3'
)
l
=
slim
.
layers
.
flatten
(
l
,
scope
=
'flatten'
)
l
=
slim
.
layers
.
fully_connected
(
l
,
512
,
scope
=
'fc0'
)
l
=
slim
.
layers
.
dropout
(
l
,
is_training
=
is_training
)
logits
=
slim
.
layers
.
fully_connected
(
l
,
10
,
activation_fn
=
None
,
scope
=
'fc1'
)
else
:
# The context manager `argscope` sets the default option for all the layers under
# this context. Here we use 32 channel convolution with shape 3x3 and
# PReLU as nonlinearity.
with
argscope
(
Conv2D
,
kernel_shape
=
3
,
nl
=
PReLU
.
f
,
out_channel
=
32
):
# this context. Here we use 32 channel convolution with shape 3x3
with
argscope
(
Conv2D
,
kernel_shape
=
3
,
nl
=
tf
.
nn
.
relu
,
out_channel
=
32
):
"""
LinearWrap is just a convenient way to compose a linear symbolic graph.
You can also do the equivalent in tensorflow style:
l = Conv2D('conv0', image)
l = MaxPooling('pool0', image
, 2)
l = MaxPooling('pool0', l
, 2)
... """
logits
=
(
LinearWrap
(
image
)
# the starting brace is only for line-breaking
...
...
@@ -62,8 +79,8 @@ class Model(ModelDesc):
.
FullyConnected
(
'fc1'
,
out_dim
=
10
,
nl
=
tf
.
identity
)())
prob
=
tf
.
nn
.
softmax
(
logits
,
name
=
'prob'
)
# a Bx10 with probabilities
cost
=
tf
.
nn
.
sparse_softmax_cross_entropy_with_logits
(
logits
,
label
)
# a vector of length B with loss of each sample
# a vector of length B with loss of each sample
cost
=
tf
.
nn
.
sparse_softmax_cross_entropy_with_logits
(
logits
,
label
)
cost
=
tf
.
reduce_mean
(
cost
,
name
=
'cross_entropy_loss'
)
# the average cross-entropy loss
# compute the "incorrect vector", for the callback ClassificationError to use at validation time
...
...
@@ -76,16 +93,23 @@ class Model(ModelDesc):
train_error
=
tf
.
reduce_mean
(
wrong
,
name
=
'train_error'
)
summary
.
add_moving_summary
(
train_error
)
if
not
USE_SLIM
:
# Use a regex to find parameters to apply weight decay.
# Here we apply a weight decay on all W (weight matrix) of all fc layers
wd_cost
=
tf
.
mul
(
1e-5
,
regularize_cost
(
'fc.*/W'
,
tf
.
nn
.
l2_loss
),
name
=
'regularize_loss'
)
summary
.
add_moving_summary
(
cost
,
wd_cost
)
self
.
cost
=
tf
.
add_n
([
wd_cost
,
cost
],
name
=
'total_cost'
)
summary
.
add_moving_summary
(
cost
,
wd_cost
,
self
.
cost
)
else
:
# slim already adds regularization to a collection, no extra handling
self
.
cost
=
cost
summary
.
add_moving_summary
(
cost
)
# monitor histogram of all weight (of conv and fc layers) in tensorboard
summary
.
add_param_summary
([(
'.*/W'
,
[
'histogram'
])])
self
.
cost
=
tf
.
add_n
([
wd_cost
,
cost
],
name
=
'cost'
)
summary
.
add_param_summary
([(
'.*/W'
,
[
'histogram'
,
'rms'
]),
(
'.*/weights'
,
[
'histogram'
,
'rms'
])
# to also work with slim
])
def
get_data
():
...
...
@@ -122,7 +146,7 @@ def get_config():
InferenceRunner
(
# run inference(for validation) after every epoch
dataset_test
,
# the DataFlow instance used for validation
# Calculate both the cost and the error for this DataFlow
[
ScalarStats
(
'c
ost
'
),
ClassificationError
(
'incorrect'
)]),
[
ScalarStats
(
'c
ross_entropy_loss
'
),
ClassificationError
(
'incorrect'
)]),
]),
model
=
Model
(),
step_per_epoch
=
step_per_epoch
,
...
...
tensorpack/models/batch_norm.py
View file @
1b73d9cc
...
...
@@ -112,6 +112,7 @@ def BatchNormV2(x, use_local_stat=None, decay=0.9, epsilon=1e-5):
Note:
* In multi-tower training, only the first training tower maintains a moving average.
This is consistent with most frameworks.
* It automatically selects :meth:`BatchNormV1` or :meth:`BatchNormV2`
according to availability.
...
...
tensorpack/models/model_desc.py
View file @
1b73d9cc
...
...
@@ -11,6 +11,7 @@ import six
from
..utils
import
logger
,
INPUT_VARS_KEY
from
..tfutils.gradproc
import
CheckGradient
from
..tfutils.summary
import
add_moving_summary
from
..tfutils.tower
import
get_current_tower_context
__all__
=
[
'ModelDesc'
,
'InputVar'
,
'ModelFromMetaGraph'
]
...
...
@@ -113,42 +114,32 @@ Use _build_graph(self, input_vars) and get_current_tower_context().is_training i
def
get_cost
(
self
):
"""
Return the cost tensor in the graph. Called by some of the :class:`tensorpack.train.Trainer` which
assumes single-cost models. Apply tfSlim modifications.
"""
assumes single-cost models.
# current scope
scope
=
tf
.
get_variable_scope
()
This function also apply tfslim collections to the cost automatically, including
``tf.GraphKeys.REGULARIZATION_LOSSES`` and
``tf.GraphKeys.UPDATE_OPS``. This is because slim users would expect
the regularizer being automatically applied once used in slim layers.
"""
# the model cost so far
cost
=
self
.
_get_cost
()
# In contrast to this lib, when using tfSlim the user expect
# "with slim.arg_scope([...], weights_regularizer=slim.l2_regularizer(0.001)"
# to regularize these layers automatically. Note, this already contains the multiplier!
regulization_losses
=
0
# try to prevent regEx error, iff scope name is empty ("")
try
:
regulization_losses
=
set
(
tf
.
get_collection
(
tf
.
GraphKeys
.
REGULARIZATION_LOSSES
,
scope
=
scope
))
except
Exception
:
regulization_losses
=
set
(
tf
.
get_collection
(
tf
.
GraphKeys
.
REGULARIZATION_LOSSES
))
# TODO: check if "scope=scope" should be used here too
if
len
(
regulization_losses
)
>
0
:
cost
+=
tf
.
add_n
(
regulization_losses
,
name
=
"regularize_loss"
)
reg_loss
=
tf
.
add_n
(
list
(
regulization_losses
),
name
=
"regularize_loss"
)
cost
=
tf
.
add
(
reg_loss
,
cost
,
name
=
'total_cost'
)
add_moving_summary
(
reg_loss
,
cost
)
# As these batch-norm statistics quickly accumulate, there is no significant loss of accuracy
# if only the main tower handles all batch-normalization updates, which are then shared across
# the towers
ctx
=
get_current_tower_context
()
if
ctx
is
not
None
and
ctx
.
is_main_training_tower
:
# if there is no entry in tf.GraphKeys.UPDATE_OPS, then there is a regEx exception
try
:
non_grad_updates
=
set
(
tf
.
get_collection
(
tf
.
GraphKeys
.
UPDATE_OPS
,
scope
=
scope
))
except
Exception
:
non_grad_updates
=
set
(
tf
.
get_collection
(
tf
.
GraphKeys
.
UPDATE_OPS
))
if
non_grad_updates
:
with
tf
.
control_dependencies
(
non_grad_updates
):
barrier
=
tf
.
control_flow_ops
.
no_op
(
name
=
'batchnorm
_barrier'
)
barrier
=
tf
.
control_flow_ops
.
no_op
(
name
=
'update_ops
_barrier'
)
cost
=
tf
.
control_flow_ops
.
with_dependencies
([
barrier
],
cost
)
return
cost
...
...
tensorpack/tfutils/summary.py
View file @
1b73d9cc
...
...
@@ -116,7 +116,7 @@ def summary_moving_average(tensors=None):
:returns: a op to maintain these average.
"""
if
tensors
is
None
:
tensors
=
tf
.
get_collection
(
MOVING_SUMMARY_VARS_KEY
)
tensors
=
set
(
tf
.
get_collection
(
MOVING_SUMMARY_VARS_KEY
)
)
# TODO will produce tower0/xxx. not elegant
with
tf
.
name_scope
(
None
):
...
...
tensorpack/train/feedfree.py
View file @
1b73d9cc
...
...
@@ -9,7 +9,7 @@ from ..utils import logger
from
..tfutils
import
get_global_step_var
from
..tfutils.tower
import
TowerContext
from
..tfutils.gradproc
import
apply_grad_processors
from
..tfutils.summary
import
summary_moving_average
,
add_moving_summary
from
..tfutils.summary
import
summary_moving_average
from
.input_data
import
QueueInput
,
FeedfreeInput
from
.base
import
Trainer
...
...
@@ -51,7 +51,6 @@ class SingleCostFeedfreeTrainer(FeedfreeTrainerBase):
cost_var
,
gate_gradients
=
tf
.
train
.
Optimizer
.
GATE_NONE
,
colocate_gradients_with_ops
=
False
)
add_moving_summary
(
cost_var
)
return
cost_var
,
grads
def
run_step
(
self
):
...
...
tensorpack/train/trainer.py
View file @
1b73d9cc
...
...
@@ -9,7 +9,7 @@ from .base import Trainer
from
..utils
import
SUMMARY_BACKUP_KEYS
,
PREDICT_TOWER
from
..tfutils
import
(
get_tensors_by_names
,
freeze_collection
,
get_global_step_var
,
TowerContext
)
from
..tfutils.summary
import
summary_moving_average
,
add_moving_summary
from
..tfutils.summary
import
summary_moving_average
from
..predict
import
OnlinePredictor
,
build_multi_tower_prediction_graph
from
..tfutils.gradproc
import
apply_grad_processors
from
.input_data
import
FeedInput
...
...
@@ -82,7 +82,6 @@ class SimpleTrainer(Trainer):
with
TowerContext
(
''
,
is_training
=
True
):
model
.
build_graph
(
self
.
input_vars
)
cost_var
=
model
.
get_cost
()
add_moving_summary
(
cost_var
)
grads
=
self
.
config
.
optimizer
.
compute_gradients
(
cost_var
)
grads
=
apply_grad_processors
(
grads
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment