Commit a4371695 authored by Yuxin Wu's avatar Yuxin Wu

docs

parent e1fbdca1
......@@ -15,38 +15,41 @@ from .utils import logger
from .tfutils.modelutils import describe_model
from .dataflow import DataFlow, BatchData
__all__ = ['PredictConfig', 'DatasetPredictor', 'get_predict_func']
class PredictConfig(object):
def __init__(self, **kwargs):
"""
The config used by `get_predict_func`
Args:
session_config: a tf.ConfigProto instance to instantiate the
The config used by `get_predict_func`.
:param session_config: a `tf.ConfigProto` instance to instantiate the
session. default to a session running 1 GPU.
session_init: a tensorpack.utils.sessinit.SessionInit instance to
:param session_init: a `utils.sessinit.SessionInit` instance to
initialize variables of a session.
input_data_mapping: Decide the mapping from each component in data
:param input_data_mapping: Decide the mapping from each component in data
to the input tensor, since you may not need all input variables
of the graph to run the graph for prediction (for example
the `label` input is not used if you only need probability
distribution).
It should be a list with size=len(one_data_point),
It should be a list with size=len(data_point),
where each element is an index of the input variables each
component of the data point should be fed into.
If not given, defaults to range(len(input_vars))
For example, with image classification task, the testing
For example, in image classification task, the testing
dataset only provides datapoints of images (no labels). When
the input variables of the model is:
the input variables of the model is: ::
input_vars: [image_var, label_var]
the mapping should look like:
input_data_mapping: [0]
If this argument is not set in this case, the inputs and the data points won't be aligned.
model: a ModelDesc instance
output_var_names: a list of names of the output variable to predict, the
the mapping should look like: ::
input_data_mapping: [0] # the first component in a datapoint should map to `image_var`
:param model: a `ModelDesc` instance
:param output_var_names: a list of names of the output variables to predict, the
variables can be any computable tensor in the graph.
if None, will only calculate the cost returned by `get_model_func`.
Predict only specific output (instead of the cost)
might be faster and might require only some of the input variables.
Predict specific output might not require all input variables.
"""
def assert_type(v, tp):
assert isinstance(v, tp), v.__class__
......@@ -55,18 +58,14 @@ class PredictConfig(object):
self.session_init = kwargs.pop('session_init')
self.model = kwargs.pop('model')
self.input_data_mapping = kwargs.pop('input_data_mapping', None)
self.output_var_names = kwargs.pop('output_var_names', None)
self.output_var_names = kwargs.pop('output_var_names')
assert len(kwargs) == 0, 'Unknown arguments: {}'.format(str(kwargs.keys()))
def get_predict_func(config):
"""
Args:
config: a PredictConfig
Returns:
A prediction function that takes a list of inputs value, and return
one/a list of output values.
If `output_var_names` is set, then the prediction function will
return a list of output values. If not, will return a cost.
:param config: a `PredictConfig` instance.
:returns: A prediction function that takes a list of input values, and return
a list of output values defined in ``config.output_var_names``.
"""
output_var_names = config.output_var_names
......@@ -106,10 +105,14 @@ def get_predict_func(config):
PredictResult = namedtuple('PredictResult', ['input', 'output'])
class DatasetPredictor(object):
"""
Run the predict_config on a given `DataFlow`.
"""
def __init__(self, predict_config, dataset, batch=0):
"""
A predictor with the given predict_config, run on the given dataset
if batch is larger than zero, the dataset will be batched
:param predict_config: a `PredictConfig` instance.
:param dataset: a `DataFlow` instance.
:param batch: if batch > zero, will batch the dataset before running.
"""
assert isinstance(dataset, DataFlow)
self.ds = dataset
......@@ -118,11 +121,14 @@ class DatasetPredictor(object):
self.predict_func = get_predict_func(predict_config)
def get_result(self):
""" a generator to return prediction for each data"""
""" A generator to produce prediction for each data"""
with tqdm(total=self.ds.size()) as pbar:
for dp in self.ds.get_data():
yield PredictResult(dp, self.predict_func(dp))
pbar.update()
def get_all_result(self):
"""
Run over the dataset and return a list of all predictions.
"""
return list(self.get_result())
......@@ -5,13 +5,13 @@
from pkgutil import walk_packages
import os
def global_import(name):
def _global_import(name):
p = __import__(name, globals(), None, level=1)
lst = p.__all__ if '__all__' in dir(p) else dir(p)
for k in lst:
globals()[k] = p.__dict__[k]
global_import('sessinit')
global_import('common')
global_import('gradproc')
_global_import('sessinit')
_global_import('common')
_global_import('gradproc')
......@@ -8,8 +8,11 @@ import tensorflow as tf
def get_default_sess_config(mem_fraction=0.5):
"""
Return a better config to use as default.
Tensorflow default session config consume too much resources
Return a better session config to use as default.
Tensorflow default session config consume too much resources.
:param mem_fraction: fraction of memory to use.
:returns: a `tf.ConfigProto` object.
"""
conf = tf.ConfigProto()
conf.gpu_options.per_process_gpu_memory_fraction = mem_fraction
......@@ -18,7 +21,7 @@ def get_default_sess_config(mem_fraction=0.5):
return conf
def get_global_step_var():
""" get global_step variable in the current graph"""
""" :returns: the global_step variable in the current graph. create if not existed"""
try:
return tf.get_default_graph().get_tensor_by_name(GLOBAL_STEP_VAR_NAME)
except KeyError:
......@@ -27,13 +30,19 @@ def get_global_step_var():
return var
def get_global_step():
""" get global_step value with current graph and session"""
""" :returns: global_step value in current graph and session"""
return tf.train.global_step(
tf.get_default_session(),
get_global_step_var())
def get_op_var_name(name):
"""
Variable name is assumed to be ``op_name + ':0'``
:param name: an op or a variable name
:returns: (op_name, variable_name)
"""
if name.endswith(':0'):
return name[:-2], name
else:
......
......@@ -14,18 +14,24 @@ __all__ = ['GradientProcessor', 'SummaryGradient', 'CheckGradient',
class GradientProcessor(object):
__metaclass__ = ABCMeta
@abstractmethod
def process(self, grads):
"""
Process the symbolic gradients, return symbolic gradients
grads: list of (grad, var)
Process the symbolic gradients.
:param grads: list of (grad, var)
:returns: symbolic gradients with the same type as input
"""
self._process(grads)
@abstractmethod
def _process(self, grads):
pass
class SummaryGradient(GradientProcessor):
"""
Summary history and RMS for each graident variable
"""
def process(self, grads):
def _process(self, grads):
for grad, var in grads:
tf.histogram_summary(var.op.name + '/grad', grad)
tf.scalar_summary(var.op.name + '/gradRMS',
......@@ -37,7 +43,7 @@ class CheckGradient(GradientProcessor):
"""
Check for numeric issue
"""
def process(self, grads):
def _process(self, grads):
for grad, var in grads:
assert grad is not None, "Grad is None for variable {}".format(var.name)
# TODO make assert work
......@@ -50,11 +56,11 @@ class ScaleGradient(GradientProcessor):
"""
def __init__(self, multipliers):
"""
multipliers: list of (regex, float)
:param multipliers: list of (regex, float)
"""
self.multipliers = multipliers
def process(self, grads):
def _process(self, grads):
# TODO use None for zero to speed up?
ret = []
for grad, var in grads:
......
......@@ -7,7 +7,7 @@ import tensorflow as tf
from ..utils import logger
def describe_model():
""" describe the current model parameters"""
""" print a description of the current model parameters """
train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
msg = [""]
total = 0
......@@ -22,7 +22,10 @@ def describe_model():
def get_shape_str(tensors):
""" return the shape string for a tensor or a list of tensors"""
"""
:param tensors: a tensor or a list of tensors
:returns: a string to describe the shape
"""
if isinstance(tensors, (list, tuple)):
for v in tensors:
assert isinstance(v, (tf.Tensor, tf.Variable)), "Not a tensor: {}".format(type(v))
......
......@@ -14,18 +14,36 @@ __all__ = ['SessionInit', 'NewSession', 'SaverRestore', 'ParamRestore',
'dump_session_params']
class SessionInit(object):
""" Base class for utilities to initialize a session"""
__metaclass__ = ABCMeta
@abstractmethod
def init(self, sess):
""" Method to initialize a session"""
""" Initialize a session
:param sess: a `tf.Session`
"""
self._init(sess)
@abstractmethod
def _init(self, sess):
pass
class NewSession(SessionInit):
def init(self, sess):
"""
Create a new session. All variables will be initialized by their
initializer.
"""
def _init(self, sess):
sess.run(tf.initialize_all_variables())
class SaverRestore(SessionInit):
"""
Restore an old model saved by `tf.Saver`.
"""
def __init__(self, model_path):
"""
:param model_path: a model file or a ``checkpoint`` file.
"""
assert os.path.isfile(model_path)
if os.path.basename(model_path) == 'checkpoint':
model_path = tf.train.get_checkpoint_state(
......@@ -33,7 +51,7 @@ class SaverRestore(SessionInit):
assert os.path.isfile(model_path)
self.set_path(model_path)
def init(self, sess):
def _init(self, sess):
saver = tf.train.Saver()
saver.restore(sess, self.path)
logger.info(
......@@ -44,12 +62,15 @@ class SaverRestore(SessionInit):
class ParamRestore(SessionInit):
"""
Restore trainable variables from a dictionary
Restore trainable variables from a dictionary.
"""
def __init__(self, param_dict):
"""
:param param_dict: a dict of {name: value}
"""
self.prms = param_dict
def init(self, sess):
def _init(self, sess):
sess.run(tf.initialize_all_variables())
variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
var_dict = dict([v.name, v] for v in variables)
......@@ -70,7 +91,9 @@ class ParamRestore(SessionInit):
sess.run(var.assign(value))
def dump_session_params(path):
""" dump value of all trainable variables to a dict"""
""" Dump value of all trainable variables to a dict and save to `path` as
npy format.
"""
var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
result = {}
for v in var:
......
......@@ -10,9 +10,7 @@ from . import get_global_step_var
def create_summary(name, v):
"""
Return a tf.Summary object with name and simple value v
Args: v: a value
Return a tf.Summary object with name and simple scalar value v
"""
assert isinstance(name, six.string_types), type(name)
v = float(v)
......@@ -22,8 +20,8 @@ def create_summary(name, v):
def add_activation_summary(x, name=None):
"""
Summary for an activation tensor x.
If name is None, use x.name
Add summary to graph for an activation tensor x.
If name is None, use x.name.
"""
ndim = x.get_shape().ndims
assert ndim >= 2, \
......@@ -35,9 +33,10 @@ def add_activation_summary(x, name=None):
def add_param_summary(summary_lists):
"""
summary_lists: list of (regex, [list of action to perform])
action can be 'mean', 'scalar', 'histogram', 'sparsity'
Add summary for all trainable variables matching the regex
:param summary_lists: list of (regex, [list of action to perform]).
Action can be 'mean', 'scalar', 'histogram', 'sparsity'.
"""
def perform(var, action):
ndim = var.get_shape().ndims
......@@ -67,10 +66,12 @@ def add_param_summary(summary_lists):
for act in actions:
perform(p, act)
# TODO use name of cost_var
def summary_moving_average(cost_var):
""" Create a MovingAverage op and summary for all variables in
MOVING_SUMMARY_VARS_KEY, as well as the argument
Return a op to maintain these average
MOVING_SUMMARY_VARS_KEY, as well as `cost_var`.
:returns: a op to maintain these average.
"""
global_step_var = get_global_step_var()
averager = tf.train.ExponentialMovingAverage(
......
......@@ -6,6 +6,11 @@ import tensorflow as tf
import numpy as np
def one_hot(y, num_labels):
"""
:param y: prediction. an Nx1 int tensor.
:param num_labels: an int. number of output classes
:returns: an NxC onehot matrix.
"""
with tf.op_scope([y, num_labels], 'one_hot'):
batch_size = tf.size(y)
y = tf.expand_dims(y, 1)
......@@ -18,9 +23,9 @@ def one_hot(y, num_labels):
def prediction_incorrect(logits, label):
"""
logits: batchxN
label: batch
return a binary vector with 1 means incorrect prediction
:param logits: NxC
:param label: N
:returns: a binary vector of length N with 1 meaning incorrect prediction
"""
with tf.op_scope([logits, label], 'incorrect'):
wrong = tf.not_equal(
......@@ -30,13 +35,24 @@ def prediction_incorrect(logits, label):
return wrong
def flatten(x):
"""
Flatten the tensor.
"""
return tf.reshape(x, [-1])
def batch_flatten(x):
"""
Flatten the tensor except the first dimension.
"""
total_dim = np.prod(x.get_shape()[1:].as_list())
return tf.reshape(x, [-1, total_dim])
def logSoftmax(x):
"""
Batch log softmax.
:param x: NxC tensor.
:returns: NxC tensor.
"""
with tf.op_scope([x], 'logSoftmax'):
z = x - tf.reduce_max(x, 1, keep_dims=True)
logprob = z - tf.log(tf.reduce_sum(tf.exp(z), 1, keep_dims=True))
......
......@@ -17,11 +17,14 @@ from ..tfutils.modelutils import describe_model
__all__ = ['Trainer']
class Trainer(object):
"""
Base class for a trainer.
"""
__metaclass__ = ABCMeta
def __init__(self, config):
"""
Config: a `TrainConfig` instance
:param config: a `TrainConfig` instance
"""
assert isinstance(config, TrainConfig), type(config)
self.config = config
......@@ -29,10 +32,12 @@ class Trainer(object):
@abstractmethod
def train(self):
""" Start training"""
pass
@abstractmethod
def run_step(self):
""" run an iteration"""
pass
def trigger_epoch(self):
......
......@@ -18,21 +18,20 @@ class TrainConfig(object):
"""
def __init__(self, **kwargs):
"""
Args:
dataset: the dataset to train. a tensorpack.dataflow.DataFlow instance.
optimizer: a tf.train.Optimizer instance defining the optimizer for trainig.
callbacks: a tensorpack.utils.callback.Callbacks instance. Define
the callbacks to perform during training. has to contain a
:param dataset: the dataset to train. a `DataFlow` instance.
:param optimizer: a `tf.train.Optimizer` instance defining the optimizer for trainig.
:param callbacks: a `callback.Callbacks` instance. Define
the callbacks to perform during training. It has to contain a
SummaryWriter and a PeriodicSaver
session_config: a tf.ConfigProto instance to instantiate the
:param session_config: a `tf.ConfigProto` instance to instantiate the
session. default to a session running 1 GPU.
session_init: a tensorpack.utils.sessinit.SessionInit instance to
:param session_init: a `sessinit.SessionInit` instance to
initialize variables of a session. default to a new session.
model: a ModelDesc instance
starting_epoch: int. default to be 1.
step_per_epoch: the number of steps (SGD updates) to perform in each epoch.
max_epoch: maximum number of epoch to run training. default to 100
nr_tower: int. number of towers. default to 1.
:param model: a `ModelDesc` instance.j
:param starting_epoch: int. default to be 1.
:param step_per_epoch: the number of steps (SGD updates) to perform in each epoch.
:param max_epoch: maximum number of epoch to run training. default to 100
:param nr_tower: int. number of towers. default to 1.
"""
def assert_type(v, tp):
assert isinstance(v, tp), v.__class__
......
......@@ -10,10 +10,10 @@ Common utils.
These utils should be irrelevant to tensorflow.
"""
def global_import(name):
def _global_import(name):
p = __import__(name, globals(), None, level=1)
lst = p.__all__ if '__all__' in dir(p) else dir(p)
for k in lst:
globals()[k] = p.__dict__[k]
global_import('naming')
global_import('utils')
_global_import('naming')
_global_import('utils')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment