Commit fa988c58 authored by ppwwyyxx's avatar ppwwyyxx

predictor, init/restore session

parent ee0bca2d
......@@ -36,22 +36,22 @@ def get_model(inputs, is_training):
#[image, label], BATCH_SIZE, CAPACITY, MIN_AFTER_DEQUEUE,
#num_threads=2, enqueue_many=False)
conv0 = Conv2D('conv0', image, out_channel=64, kernel_shape=5, padding='SAME')
pool0 = MaxPooling('pool0', conv0, 3, stride=2, padding='SAME')
norm0 = tf.nn.lrn(pool0, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm0')
l = Conv2D('conv0', image, out_channel=64, kernel_shape=5, padding='SAME')
l = MaxPooling('pool0', l, 3, stride=2, padding='SAME')
l = tf.nn.lrn(l, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm0')
conv1 = Conv2D('conv1', norm0, out_channel=64, kernel_shape=5, padding='SAME')
norm1 = tf.nn.lrn(conv1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')
pool1 = MaxPooling('pool1', norm1, 3, stride=2, padding='SAME')
l = Conv2D('conv1', l, out_channel=64, kernel_shape=5, padding='SAME')
l = tf.nn.lrn(l, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')
l = MaxPooling('pool1', l, 3, stride=2, padding='SAME')
fc0 = FullyConnected('fc0', pool1, 384)
fc1 = FullyConnected('fc1', fc0, out_dim=192)
l = FullyConnected('fc0', l, 384)
l = FullyConnected('fc1', l, out_dim=192)
# fc will have activation summary by default. disable this for the output layer
fc2 = FullyConnected('fc2', fc1, out_dim=10, summary_activation=False, nl=tf.identity)
prob = tf.nn.softmax(fc2, name='output')
logits = FullyConnected('fc2', l, out_dim=10, summary_activation=False, nl=tf.identity)
prob = tf.nn.softmax(logits, name='output')
y = one_hot(label, 10)
cost = tf.nn.softmax_cross_entropy_with_logits(fc2, y)
cost = tf.nn.softmax_cross_entropy_with_logits(logits, y)
cost = tf.reduce_mean(cost, name='cross_entropy_loss')
tf.add_to_collection(COST_VARS_KEY, cost)
......@@ -88,11 +88,8 @@ def get_config():
#step_per_epoch = 20
#dataset_test = FixedSizeData(dataset_test, 20)
sess_config = tf.ConfigProto()
sess_config.device_count['GPU'] = 1
sess_config = get_default_sess_config()
sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5
sess_config.gpu_options.allocator_type = 'BFC'
sess_config.allow_soft_placement = True
# prepare model
input_vars = [
......
......@@ -51,21 +51,21 @@ def get_model(inputs, is_training):
#[image, label], BATCH_SIZE, CAPACITY, MIN_AFTER_DEQUEUE,
#num_threads=2, enqueue_many=False)
conv0 = Conv2D('conv0', image, out_channel=32, kernel_shape=5)
pool0 = MaxPooling('pool0', conv0, 2)
conv1 = Conv2D('conv1', pool0, out_channel=40, kernel_shape=3)
pool1 = MaxPooling('pool1', conv1, 2)
l = Conv2D('conv0', image, out_channel=32, kernel_shape=5)
l = MaxPooling('pool0', l, 2)
l = Conv2D('conv1', l, out_channel=40, kernel_shape=3)
l = MaxPooling('pool1', l, 2)
fc0 = FullyConnected('fc0', pool1, 1024)
fc0 = tf.nn.dropout(fc0, keep_prob)
l = FullyConnected('fc0', l, 1024)
l = tf.nn.dropout(l, keep_prob)
# fc will have activation summary by default. disable this for the output layer
fc1 = FullyConnected('fc1', fc0, out_dim=10,
logits = FullyConnected('fc1', l, out_dim=10,
summary_activation=False, nl=tf.identity)
prob = tf.nn.softmax(fc1, name='output')
prob = tf.nn.softmax(logits, name='output')
y = one_hot(label, 10)
cost = tf.nn.softmax_cross_entropy_with_logits(fc1, y)
cost = tf.nn.softmax_cross_entropy_with_logits(logits, y)
cost = tf.reduce_mean(cost, name='cross_entropy_loss')
tf.add_to_collection(COST_VARS_KEY, cost)
......@@ -101,11 +101,8 @@ def get_config():
#step_per_epoch = 20
#dataset_test = FixedSizeData(dataset_test, 20)
sess_config = tf.ConfigProto()
sess_config.device_count['GPU'] = 1
sess_config = get_default_sess_config()
sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5
sess_config.gpu_options.allocator_type = 'BFC'
sess_config.allow_soft_placement = True
# prepare model
input_vars = [
......@@ -116,6 +113,8 @@ def get_config():
]
input_queue = tf.RandomShuffleQueue(
100, 50, [x.dtype for x in input_vars], name='queue')
#input_queue = tf.FIFOQueue(
#100, [x.dtype for x in input_vars], name='queue')
global_step_var = tf.get_default_graph().get_tensor_by_name(GLOBAL_STEP_VAR_NAME)
lr = tf.train.exponential_decay(
......
......@@ -9,66 +9,53 @@ import argparse
import numpy as np
from utils import *
from utils.modelutils import describe_model, restore_params
from utils.modelutils import describe_model
from utils import logger
from dataflow import DataFlow
def start_infer(config):
def get_predict_func(config):
"""
Args:
config: a tensorpack config dictionary
Returns:
a function that takes a list of inputs to run the model
"""
dataset = config['dataset']
assert isinstance(dataset, DataFlow), dataset.__class__
# a tf.ConfigProto instance
sess_config = config.get('session_config', None)
if sess_config is None:
sess_config = get_default_sess_config()
assert isinstance(sess_config, tf.ConfigProto), sess_config.__class__
# TODO callback should have trigger_step and trigger_end?
callback = config['callback']
# restore saved params
params = config.get('restore_params', {})
sess_init = config['session_init']
# input/output variables
input_vars = config['inputs']
get_model_func = config['get_model_func']
output_vars, cost_var = get_model_func(input_vars, is_training=False)
# build graph
G = tf.get_default_graph()
G.add_to_collection(FORWARD_FUNC_KEY, get_model_func)
for v in input_vars:
G.add_to_collection(INPUT_VARS_KEY, v)
for v in output_vars:
G.add_to_collection(OUTPUT_VARS_KEY, v)
describe_model()
sess = tf.Session(config=sess_config)
sess.run(tf.initialize_all_variables())
sess_init.init(sess)
restore_params(sess, params)
with sess.as_default():
with timed_operation('running one batch'):
for dp in dataset.get_data():
def run_input(dp):
feed = dict(zip(input_vars, dp))
fetches = [cost_var] + output_vars
results = sess.run(fetches, feed_dict=feed)
results = sess.run(
[cost_var] + output_vars, feed_dict=feed)
cost = results[0]
outputs = results[1:]
prob = outputs[0]
callback(dp, outputs, cost)
def main(get_config_func):
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') # nargs='*' in multi mode
args = parser.parse_args()
if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
with tf.Graph().as_default():
config = get_config_func()
start_infer(config)
return cost, outputs
return run_input
class DatasetPredictor(object):
def __init__(self, predict_config, dataset):
assert isinstance(dataset, DataFlow)
self.ds = dataset
self.predict_func = get_predict_func(predict_config)
def get_result(self):
""" a generator to return prediction for each data"""
for dp in self.ds.get_data():
yield self.predict_func(dp)
def get_all_result(self):
return list(self.get_result())
......@@ -10,7 +10,8 @@ import argparse
from utils import *
from utils.concurrency import EnqueueThread,coordinator_guard
from utils.summary import summary_moving_average
from utils.modelutils import restore_params, describe_model
from utils.modelutils import describe_model
from utils.sessinit import NewSession
from utils import logger
from dataflow import DataFlow
......@@ -53,8 +54,7 @@ def start_train(config):
sess_config = config.get('session_config', None)
assert isinstance(sess_config, tf.ConfigProto), sess_config.__class__
# restore saved params
params = config.get('restore_params', {})
sess_init = config.get('session_init', NewSession())
# input/output variables
input_vars = config['inputs']
......@@ -83,9 +83,7 @@ def start_train(config):
train_op = get_train_op(optimizer, cost_var)
sess = tf.Session(config=sess_config)
sess.run(tf.initialize_all_variables())
restore_params(sess, params)
sess_init.init(sess)
# start training:
coord = tf.train.Coordinator()
......
......@@ -53,3 +53,16 @@ def create_test_session():
with create_test_graph():
with tf.Session() as sess:
yield sess
def get_default_sess_config():
"""
Return a better config to use as default.
Tensorflow default session config consume too much resources
"""
conf = tf.ConfigProto()
conf.device_count['GPU'] = 1
conf.gpu_options.per_process_gpu_memory_fraction = 0.8
conf.gpu_options.allocator_type = 'BFC'
conf.allow_soft_placement = True
return conf
......@@ -59,12 +59,16 @@ class PeriodicCallback(Callback):
pass
class PeriodicSaver(PeriodicCallback):
def __init__(self, period=1):
def __init__(self, period=1, keep_recent=50, keep_freq=0.5):
super(PeriodicSaver, self).__init__(period)
self.path = os.path.join(logger.LOG_DIR, 'model')
self.keep_recent = keep_recent
self.keep_freq = keep_freq
def _before_train(self):
self.saver = tf.train.Saver(max_to_keep=99999)
self.saver = tf.train.Saver(
max_to_keep=self.keep_recent,
keep_checkpoint_every_n_hours=self.keep_freq)
def _trigger(self):
self.saver.save(tf.get_default_session(), self.path,
......@@ -142,6 +146,11 @@ class TrainCallbacks(Callback):
tm.log()
class TestCallbacks(Callback):
"""
Hold callbacks to be run in testing graph.
Will set a context with testing graph and testing session, for
each test-time callback to run
"""
def __init__(self, callbacks):
self.cbs = callbacks
......
......@@ -7,6 +7,7 @@ import logging
import os
import os.path
from termcolor import colored
from .utils import mkdir_p
__all__ = []
......@@ -56,5 +57,6 @@ LOG_DIR = "train_log"
def set_logger_dir(dirname):
global LOG_DIR
LOG_DIR = dirname
mkdir_p(LOG_DIR)
set_file(os.path.join(LOG_DIR, 'training.log'))
......@@ -6,18 +6,6 @@
import tensorflow as tf
import logger
def restore_params(sess, params):
variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
var_dict = dict([v.name, v] for v in variables)
for name, value in params.iteritems():
try:
var = var_dict[name]
except (ValueError, KeyError):
logger.warn("Param {} not found in this graph".format(name))
continue
logger.info("Restoring param {}".format(name))
sess.run(var.assign(value))
def describe_model():
""" describe the current model parameters"""
train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
......
#!/usr/bin/env python2
# -*- coding: UTF-8 -*-
# File: sessinit.py
# Author: Yuxin Wu <ppwwyyxx@gmail.com>
from abc import abstractmethod
import tensorflow as tf
from . import logger
class SessionInit(object):
@abstractmethod
def init(self, sess):
""" Method to initialize a session"""
class NewSession(SessionInit):
def init(self, sess):
sess.run(tf.initialize_all_variables())
class SaverRestore(SessionInit):
def __init__(self, model_path):
self.set_path(model_path)
def init(self, sess):
saver = tf.train.Saver()
saver.restore(sess, self.path)
logger.info(
"Restore checkpoint from {}".format(ckpt.model_checkpoint_path))
def set_path(self, model_path):
self.path = model_path
class ParamRestore(SessionInit):
def __init__(self, param_dict):
self.prms = param_dict
def init(self, sess):
variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
var_dict = dict([v.name, v] for v in variables)
for name, value in self.prms.iteritems():
try:
var = var_dict[name]
except (ValueError, KeyError):
logger.warn("Param {} not found in this graph".format(name))
continue
logger.info("Restoring param {}".format(name))
sess.run(var.assign(value))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment