Commit ef1b20f9 authored by Yuxin Wu's avatar Yuxin Wu

update on regularizer

parent 8c9c61d3
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: UTF-8 -*- # -*- coding: UTF-8 -*-
# File: cifar10-resnet-deeper.py # File: cifar10_resnet.py
# Author: Yuxin Wu <ppwwyyxx@gmail.com> # Author: Yuxin Wu <ppwwyyxx@gmail.com>
import tensorflow as tf import tensorflow as tf
...@@ -100,7 +100,6 @@ class Model(ModelDesc): ...@@ -100,7 +100,6 @@ class Model(ModelDesc):
cost = tf.reduce_mean(cost, name='cross_entropy_loss') cost = tf.reduce_mean(cost, name='cross_entropy_loss')
tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, cost) tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, cost)
# compute the number of failed samples, for ValidationError to use at test time
wrong = prediction_incorrect(logits, label) wrong = prediction_incorrect(logits, label)
nr_wrong = tf.reduce_sum(wrong, name='wrong') nr_wrong = tf.reduce_sum(wrong, name='wrong')
# monitor training error # monitor training error
...@@ -108,9 +107,7 @@ class Model(ModelDesc): ...@@ -108,9 +107,7 @@ class Model(ModelDesc):
MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error')) MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error'))
# weight decay on all W of fc layers # weight decay on all W of fc layers
wd_cost = tf.mul(0.0002, wd_cost = regularize_cost('.*/W', l2_regularizer(0.0002), name='regularize_loss')
regularize_cost('.*/W', tf.nn.l2_loss),
name='regularize_loss')
tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost) tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)
add_param_summary([('.*/W', ['histogram', 'sparsity'])]) # monitor W add_param_summary([('.*/W', ['histogram', 'sparsity'])]) # monitor W
......
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# File: example_mnist.py # File: mnist_convnet.py
# Author: Yuxin Wu <ppwwyyxx@gmail.com> # Author: Yuxin Wu <ppwwyyxx@gmail.com>
import tensorflow as tf import tensorflow as tf
...@@ -10,7 +10,7 @@ import numpy as np ...@@ -10,7 +10,7 @@ import numpy as np
import os, sys import os, sys
import argparse import argparse
from tensorpack.train import TrainConfig, SimpleTrainer from tensorpack.train import *
from tensorpack.models import * from tensorpack.models import *
from tensorpack.utils import * from tensorpack.utils import *
from tensorpack.utils.symbolic_functions import * from tensorpack.utils.symbolic_functions import *
...@@ -122,6 +122,8 @@ if __name__ == '__main__': ...@@ -122,6 +122,8 @@ if __name__ == '__main__':
args = parser.parse_args() args = parser.parse_args()
if args.gpu: if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
else:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
with tf.Graph().as_default(): with tf.Graph().as_default():
config = get_config() config = get_config()
......
...@@ -44,6 +44,7 @@ class ModelDesc(object): ...@@ -44,6 +44,7 @@ class ModelDesc(object):
""" """
pass pass
# TODO move this to QueueInputTrainer
def get_input_queue(self, input_vars): def get_input_queue(self, input_vars):
""" """
return the queue for input. the dequeued elements will be fed to self.get_cost return the queue for input. the dequeued elements will be fed to self.get_cost
...@@ -51,7 +52,7 @@ class ModelDesc(object): ...@@ -51,7 +52,7 @@ class ModelDesc(object):
when running with multiGPU, queue cannot be None when running with multiGPU, queue cannot be None
""" """
assert input_vars is not None assert input_vars is not None
return tf.FIFOQueue(50, [x.dtype for x in input_vars], name='input_queue') return tf.FIFOQueue(100, [x.dtype for x in input_vars], name='input_queue')
def get_cost(self, input_vars, is_training): def get_cost(self, input_vars, is_training):
assert type(is_training) == bool assert type(is_training) == bool
......
...@@ -8,13 +8,16 @@ import re ...@@ -8,13 +8,16 @@ import re
from ..utils import logger from ..utils import logger
from ..utils import * from ..utils import *
__all__ = ['regularize_cost'] __all__ = ['regularize_cost', 'l2_regularizer', 'l1_regularizer']
@memoized @memoized
def _log_regularizer(name): def _log_regularizer(name):
logger.info("Apply regularizer for {}".format(name)) logger.info("Apply regularizer for {}".format(name))
def regularize_cost(regex, func): l2_regularizer = tf.contrib.layers.l2_regularizer
l1_regularizer = tf.contrib.layers.l1_regularizer
def regularize_cost(regex, func, name=None):
""" """
Apply a regularizer on every trainable variable matching the regex Apply a regularizer on every trainable variable matching the regex
""" """
...@@ -23,11 +26,11 @@ def regularize_cost(regex, func): ...@@ -23,11 +26,11 @@ def regularize_cost(regex, func):
costs = [] costs = []
for p in params: for p in params:
name = p.name para_name = p.name
if re.search(regex, name): if re.search(regex, para_name):
costs.append(func(p)) costs.append(func(p))
_log_regularizer(name) _log_regularizer(para_name)
if not costs: if not costs:
return 0 return 0
return tf.add_n(costs) return tf.add_n(costs, name=name)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment