Commit 5102a8f3 authored by ppwwyyxx's avatar ppwwyyxx

type check, dataflow base

parent 63e3a42f
#!/usr/bin/env python2
# -*- coding: UTF-8 -*-
# File: base.py
# Author: Yuxin Wu <ppwwyyxx@gmail.com>
from abc import abstractmethod
__all__ = ['DataFlow']
class DataFlow(object):
@abstractmethod
def get_data(self):
"""
A generator to generate data as tuple.
"""
@abstractmethod
def size(self):
"""
Size of this data flow.
"""
...@@ -4,10 +4,11 @@ ...@@ -4,10 +4,11 @@
# Author: Yuxin Wu <ppwwyyxx@gmail.com> # Author: Yuxin Wu <ppwwyyxx@gmail.com>
import numpy as np import numpy as np
from .base import DataFlow
__all__ = ['BatchData'] __all__ = ['BatchData']
class BatchData(object): class BatchData(DataFlow):
def __init__(self, ds, batch_size, remainder=False): def __init__(self, ds, batch_size, remainder=False):
""" """
Args: Args:
......
...@@ -8,9 +8,10 @@ import gzip ...@@ -8,9 +8,10 @@ import gzip
import numpy import numpy
from six.moves import urllib from six.moves import urllib
from utils import logger from utils import logger
from ..base import DataFlow
__all__ = ['Mnist'] __all__ = ['Mnist']
SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/' SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'
...@@ -47,6 +48,7 @@ def extract_images(filename): ...@@ -47,6 +48,7 @@ def extract_images(filename):
data = data.reshape(num_images, rows, cols, 1) data = data.reshape(num_images, rows, cols, 1)
return data return data
def extract_labels(filename): def extract_labels(filename):
"""Extract the labels into a 1D uint8 numpy array [index].""" """Extract the labels into a 1D uint8 numpy array [index]."""
with gzip.open(filename) as bytestream: with gzip.open(filename) as bytestream:
...@@ -91,50 +93,43 @@ class DataSet(object): ...@@ -91,50 +93,43 @@ class DataSet(object):
def num_examples(self): def num_examples(self):
return self._num_examples return self._num_examples
def read_data_sets(train_dir): class Mnist(DataFlow):
class DataSets(object): def __init__(self, train_or_test, dir=None):
pass """
data_sets = DataSets() Args:
train_or_test: string either 'train' or 'test'
"""
if dir is None:
dir = os.path.join(os.path.dirname(__file__), 'mnist_data')
assert train_or_test in ['train', 'test']
self.train_or_test = train_or_test
TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
TEST_IMAGES = 't10k-images-idx3-ubyte.gz' TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
TEST_LABELS = 't10k-labels-idx1-ubyte.gz' TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
local_file = maybe_download(TRAIN_IMAGES, train_dir) local_file = maybe_download(TRAIN_IMAGES, dir)
train_images = extract_images(local_file) train_images = extract_images(local_file)
local_file = maybe_download(TRAIN_LABELS, train_dir) local_file = maybe_download(TRAIN_LABELS, dir)
train_labels = extract_labels(local_file) train_labels = extract_labels(local_file)
local_file = maybe_download(TEST_IMAGES, train_dir) local_file = maybe_download(TEST_IMAGES, dir)
test_images = extract_images(local_file) test_images = extract_images(local_file)
local_file = maybe_download(TEST_LABELS, train_dir) local_file = maybe_download(TEST_LABELS, dir)
test_labels = extract_labels(local_file) test_labels = extract_labels(local_file)
data_sets.train = DataSet(train_images, train_labels) self.train = DataSet(train_images, train_labels)
data_sets.test = DataSet(test_images, test_labels) self.test = DataSet(test_images, test_labels)
return data_sets
class Mnist(object):
def __init__(self, train_or_test, dir=None):
"""
Args:
train_or_test: string either 'train' or 'test'
"""
if dir is None:
dir = os.path.join(os.path.dirname(__file__), 'mnist_data')
self.dataset = read_data_sets(dir)
self.train_or_test = train_or_test
def size(self): def size(self):
ds = self.dataset.train if self.train_or_test == 'train' else self.dataset.test ds = self.train if self.train_or_test == 'train' else self.test
return ds.num_examples return ds.num_examples
def get_data(self): def get_data(self):
ds = self.dataset.train if self.train_or_test == 'train' else self.dataset.test ds = self.train if self.train_or_test == 'train' else self.test
for k in xrange(ds.num_examples): for k in xrange(ds.num_examples):
img = ds.images[k].reshape((28, 28)) img = ds.images[k].reshape((28, 28))
label = ds.labels[k] label = ds.labels[k]
...@@ -144,4 +139,5 @@ if __name__ == '__main__': ...@@ -144,4 +139,5 @@ if __name__ == '__main__':
ds = Mnist('train') ds = Mnist('train')
for (img, label) in ds.get_data(): for (img, label) in ds.get_data():
from IPython import embed; embed() from IPython import embed; embed()
break
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
import tensorflow as tf import tensorflow as tf
from utils import * from utils import *
from dataflow import DataFlow
from itertools import count from itertools import count
def prepare(): def prepare():
...@@ -20,17 +21,19 @@ def start_train(config): ...@@ -20,17 +21,19 @@ def start_train(config):
Args: Args:
config: a tensorpack config dictionary config: a tensorpack config dictionary
""" """
# a Dataflow instance
dataset_train = config['dataset_train'] dataset_train = config['dataset_train']
assert isinstance(dataset_train, DataFlow), dataset_train.__class__
# a tf.train.Optimizer instance # a tf.train.Optimizer instance
optimizer = config['optimizer'] optimizer = config['optimizer']
assert isinstance(optimizer, tf.train.Optimizer), optimizer.__class__
# a list of Callback instance # a list of Callback instance
callbacks = Callbacks(config.get('callbacks', [])) callbacks = Callbacks(config.get('callbacks', []))
# a tf.ConfigProto instance # a tf.ConfigProto instance
sess_config = config.get('session_config', None) sess_config = config.get('session_config', None)
assert isinstance(sess_config, tf.ConfigProto), sess_config.__class__
# a list of input/output variables # a list of input/output variables
input_vars = config['inputs'] input_vars = config['inputs']
......
...@@ -89,6 +89,9 @@ class SummaryWriter(Callback): ...@@ -89,6 +89,9 @@ class SummaryWriter(Callback):
class Callbacks(Callback): class Callbacks(Callback):
def __init__(self, callbacks): def __init__(self, callbacks):
for cb in callbacks:
assert isinstance(cb, Callback), cb.__class__
# put SummaryWriter to the first # put SummaryWriter to the first
for idx, cb in enumerate(callbacks): for idx, cb in enumerate(callbacks):
if type(cb) == SummaryWriter: if type(cb) == SummaryWriter:
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
import logging import logging
import os import os
import os.path
from termcolor import colored from termcolor import colored
__all__ = [] __all__ = []
...@@ -38,14 +39,14 @@ for func in ['info', 'warning', 'error', 'critical', 'warn']: ...@@ -38,14 +39,14 @@ for func in ['info', 'warning', 'error', 'critical', 'warn']:
def set_file(path): def set_file(path):
if os.path.isfile(path): if os.path.isfile(path):
warn("File \"{}\" exists! backup? (y/n)".format(path))
resp = raw_input()
if resp in ['y', 'Y']:
from datetime import datetime from datetime import datetime
backup_name = path + datetime.now().strftime('.%d-%H%M%S') backup_name = path + datetime.now().strftime('.%d-%H%M%S')
import shutil import shutil
shutil.move(path, backup_name) shutil.move(path, backup_name)
info("Log '{}' moved to '{}'".format(path, backup_name)) info("Log file '{}' backuped to '{}'".format(path, backup_name))
dirname = os.path.dirname(path)
if not os.path.isdir(dirname):
os.makedirs(dirname)
hdl = logging.FileHandler( hdl = logging.FileHandler(
filename=path, encoding='utf-8', mode='w') filename=path, encoding='utf-8', mode='w')
logger.addHandler(hdl) logger.addHandler(hdl)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment