Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
7d40e049
You need to sign in or sign up before continuing.
Commit
7d40e049
authored
Oct 10, 2017
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Write InfoGAN with tf.distributions and deprecate tfutils.distributions (fix #348)
parent
560bc84e
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
66 additions
and
39 deletions
+66
-39
examples/GAN/InfoGAN-mnist.py
examples/GAN/InfoGAN-mnist.py
+64
-39
tensorpack/tfutils/distributions.py
tensorpack/tfutils/distributions.py
+2
-0
No files found.
examples/GAN/InfoGAN-mnist.py
View file @
7d40e049
...
@@ -11,12 +11,11 @@ import sys
...
@@ -11,12 +11,11 @@ import sys
import
argparse
import
argparse
from
tensorpack
import
*
from
tensorpack
import
*
from
tensorpack.utils.viz
import
*
from
tensorpack.utils
import
viz
from
tensorpack.tfutils.distributions
import
*
from
tensorpack.tfutils.scope_utils
import
auto_reuse_variable_scope
,
under_name_scope
from
tensorpack.tfutils.scope_utils
import
auto_reuse_variable_scope
from
tensorpack.tfutils
import
optimizer
,
summary
from
tensorpack.tfutils
import
optimizer
,
summary
import
tensorpack.tfutils.symbolic_functions
as
symbf
import
tensorpack.tfutils.symbolic_functions
as
symbf
from
tensorpack.tfutils.gradproc
import
ScaleGradient
,
CheckGradient
from
tensorpack.tfutils.gradproc
import
ScaleGradient
from
tensorpack.dataflow
import
dataset
from
tensorpack.dataflow
import
dataset
from
GAN
import
GANTrainer
,
GANModelDesc
from
GAN
import
GANTrainer
,
GANModelDesc
...
@@ -31,17 +30,51 @@ A pretrained model is at https://drive.google.com/open?id=0B9IPQTvr2BBkLUF2M0RXU
...
@@ -31,17 +30,51 @@ A pretrained model is at https://drive.google.com/open?id=0B9IPQTvr2BBkLUF2M0RXU
"""
"""
BATCH
=
128
BATCH
=
128
# latent space is cat(10) x uni(2) x noise(NOISE_DIM)
NUM_CLASS
=
10
NUM_UNIFORM
=
2
DIST_PARAM_DIM
=
NUM_CLASS
+
NUM_UNIFORM
NOISE_DIM
=
62
NOISE_DIM
=
62
# prior: the assumption how the latent factors are presented in the dataset
DIST_PRIOR_PARAM
=
[
1.
]
*
NUM_CLASS
+
[
0.
]
*
NUM_UNIFORM
class
GaussianWithUniformSample
(
GaussianDistribution
):
def
get_distributions
(
vec_cat
,
vec_uniform
):
cat
=
tf
.
distributions
.
Categorical
(
logits
=
vec_cat
,
validate_args
=
True
,
name
=
'cat'
)
uni
=
tf
.
distributions
.
Normal
(
vec_uniform
,
scale
=
1.
,
validate_args
=
True
,
allow_nan_stats
=
False
,
name
=
'uni_a'
)
return
cat
,
uni
def
entropy_from_samples
(
samples
,
vec
):
"""
Estimate H(x|s) ~= -E_{x
\
sim P(x|s)}[
\
log Q(x|s)], where x are samples, and Q is parameterized by vec.
"""
samples_cat
=
tf
.
argmax
(
samples
[:,
:
NUM_CLASS
],
axis
=
1
,
output_type
=
tf
.
int32
)
samples_uniform
=
samples
[:,
NUM_CLASS
:]
cat
,
uniform
=
get_distributions
(
vec
[:,
:
NUM_CLASS
],
vec
[:,
NUM_CLASS
:])
def
neg_logprob
(
dist
,
sample
,
name
):
nll
=
-
dist
.
log_prob
(
sample
)
# average over batch
return
tf
.
reduce_sum
(
tf
.
reduce_mean
(
nll
,
axis
=
0
),
name
=
name
)
entropies
=
[
neg_logprob
(
cat
,
samples_cat
,
'nll_cat'
),
neg_logprob
(
uniform
,
samples_uniform
,
'nll_uniform'
)]
return
entropies
@
under_name_scope
()
def
sample_prior
(
batch_size
):
cat
,
_
=
get_distributions
(
DIST_PRIOR_PARAM
[:
NUM_CLASS
],
DIST_PRIOR_PARAM
[
NUM_CLASS
:])
sample_cat
=
tf
.
one_hot
(
cat
.
sample
(
batch_size
),
NUM_CLASS
)
"""
"""
OpenAI official code actually models the "uniform" latent code as
OpenAI official code actually models the "uniform" latent code as
a Gaussian distribution, but obtain the samples from a uniform distribution.
a Gaussian distribution, but obtain the samples from a uniform distribution.
We follow the official code for now.
"""
"""
def
_sample
(
self
,
batch_size
,
theta
):
sample_uni
=
tf
.
random_uniform
([
batch_size
,
NUM_UNIFORM
],
-
1
,
1
)
return
tf
.
random_uniform
([
batch_size
,
self
.
dim
],
-
1
,
1
)
samples
=
tf
.
concat
([
sample_cat
,
sample_uni
],
axis
=
1
)
return
samples
class
Model
(
GANModelDesc
):
class
Model
(
GANModelDesc
):
...
@@ -73,24 +106,15 @@ class Model(GANModelDesc):
...
@@ -73,24 +106,15 @@ class Model(GANModelDesc):
encoder
=
(
LinearWrap
(
l
)
encoder
=
(
LinearWrap
(
l
)
.
FullyConnected
(
'fce1'
,
128
,
nl
=
tf
.
identity
)
.
FullyConnected
(
'fce1'
,
128
,
nl
=
tf
.
identity
)
.
BatchNorm
(
'bne'
)
.
LeakyReLU
()
.
BatchNorm
(
'bne'
)
.
LeakyReLU
()
.
FullyConnected
(
'fce-out'
,
self
.
factors
.
param_dim
,
nl
=
tf
.
identity
)())
.
FullyConnected
(
'fce-out'
,
DIST_PARAM_DIM
,
nl
=
tf
.
identity
)())
return
logits
,
encoder
return
logits
,
encoder
def
_build_graph
(
self
,
inputs
):
def
_build_graph
(
self
,
inputs
):
real_sample
=
inputs
[
0
]
real_sample
=
inputs
[
0
]
real_sample
=
tf
.
expand_dims
(
real_sample
,
-
1
)
real_sample
=
tf
.
expand_dims
(
real_sample
,
-
1
)
# latent space is cat(10) x uni(1) x uni(1) x noise(NOISE_DIM)
self
.
factors
=
ProductDistribution
(
"factors"
,
[
CategoricalDistribution
(
"cat"
,
10
),
GaussianWithUniformSample
(
"uni_a"
,
1
),
GaussianWithUniformSample
(
"uni_b"
,
1
)])
# prior: the assumption how the factors are presented in the dataset
prior
=
tf
.
constant
([
0.1
]
*
10
+
[
0
,
0
],
tf
.
float32
,
[
12
],
name
=
'prior'
)
batch_prior
=
tf
.
tile
(
tf
.
expand_dims
(
prior
,
0
),
[
BATCH
,
1
],
name
=
'batch_prior'
)
# sample the latent code:
# sample the latent code:
zc
=
symbf
.
shapeless_placeholder
(
zc
=
symbf
.
shapeless_placeholder
(
sample_prior
(
BATCH
),
0
,
name
=
'z_code'
)
self
.
factors
.
sample
(
BATCH
,
prior
),
0
,
name
=
'z_code'
)
z_noise
=
symbf
.
shapeless_placeholder
(
z_noise
=
symbf
.
shapeless_placeholder
(
tf
.
random_uniform
([
BATCH
,
NOISE_DIM
],
-
1
,
1
),
0
,
name
=
'z_noise'
)
tf
.
random_uniform
([
BATCH
,
NOISE_DIM
],
-
1
,
1
),
0
,
name
=
'z_noise'
)
z
=
tf
.
concat
([
zc
,
z_noise
],
1
,
name
=
'z'
)
z
=
tf
.
concat
([
zc
,
z_noise
],
1
,
name
=
'z'
)
...
@@ -115,28 +139,29 @@ class Model(GANModelDesc):
...
@@ -115,28 +139,29 @@ class Model(GANModelDesc):
= H(x) + E[
\
log P(x|s)]
= H(x) + E[
\
log P(x|s)]
The distribution from which zc is sampled, in this case, is set to a fixed prior already.
The distribution from which zc is sampled, in this case, is set to a fixed prior already.
So the first term is a constant.
For the second term, we can maximize its variational lower bound:
For the second term, we can maximize its variational lower bound:
E_{x
\
sim P(x|s)}[
\
log Q(x|s)]
E_{x
\
sim P(x|s)}[
\
log Q(x|s)]
where Q(x|s) is a proposal distribution to approximate P(x|s).
where Q(x|s) is a proposal distribution to approximate P(x|s).
Here, Q(x|s) is assumed to be a distribution which shares the form
Here, Q(x|s) is assumed to be a distribution which shares the form
of
self.factors
, and whose parameters are predicted by the discriminator network.
of
P
, and whose parameters are predicted by the discriminator network.
"""
"""
with
tf
.
name_scope
(
"mutual_information"
):
with
tf
.
name_scope
(
"mutual_information"
):
ents
=
self
.
factors
.
entropy
(
zc
,
batch_prior
)
batch_prior
=
tf
.
tile
(
tf
.
expand_dims
(
DIST_PRIOR_PARAM
,
0
),
[
BATCH
,
1
],
name
=
'batch_prior'
)
with
tf
.
name_scope
(
'prior_entropy'
):
cat
,
uni
=
get_distributions
(
DIST_PRIOR_PARAM
[:
NUM_CLASS
],
DIST_PRIOR_PARAM
[
NUM_CLASS
:])
ents
=
[
cat
.
entropy
(
name
=
'cat_entropy'
),
tf
.
reduce_sum
(
uni
.
entropy
(),
name
=
'uni_entropy'
)]
entropy
=
tf
.
add_n
(
ents
,
name
=
'total_entropy'
)
entropy
=
tf
.
add_n
(
ents
,
name
=
'total_entropy'
)
# Note that dropping this term has no effect because the entropy
# Note that the entropy of prior is a constant. The paper mentioned it but didn't use it.
# of prior is a constant. The paper mentioned it but didn't use it.
# Adding this term may make the curve less stable because the
# entropy estimated from the samples is not the true value.
# post-process output vector from discriminator to obtain valid distribution parameters
with
tf
.
name_scope
(
'conditional_entropy'
):
encoder_activation
=
self
.
factors
.
encoder_activation
(
dist_param
)
cond_ents
=
entropy_from_samples
(
zc
,
dist_param
)
cond_ents
=
self
.
factors
.
entropy
(
zc
,
encoder_activation
)
cond_entropy
=
tf
.
add_n
(
cond_ents
,
name
=
"total_entropy"
)
cond_entropy
=
tf
.
add_n
(
cond_ents
,
name
=
"total_conditional_entropy"
)
MI
=
tf
.
subtract
(
entropy
,
cond_entropy
,
name
=
'mutual_information'
)
MI
=
tf
.
subtract
(
entropy
,
cond_entropy
,
name
=
'mutual_information'
)
summary
.
add_moving_summary
(
entropy
,
cond_entropy
,
MI
,
*
ents
)
summary
.
add_moving_summary
(
entropy
,
cond_entropy
,
MI
,
*
cond_
ents
)
# default GAN objective
# default GAN objective
self
.
build_losses
(
real_pred
,
fake_pred
)
self
.
build_losses
(
real_pred
,
fake_pred
)
...
@@ -151,7 +176,7 @@ class Model(GANModelDesc):
...
@@ -151,7 +176,7 @@ class Model(GANModelDesc):
self
.
collect_variables
()
self
.
collect_variables
()
def
_get_optimizer
(
self
):
def
_get_optimizer
(
self
):
lr
=
symbf
.
get_scalar_var
(
'learning_rate'
,
2e-4
,
summary
=
Tru
e
)
lr
=
tf
.
get_variable
(
'learning_rate'
,
initializer
=
2e-4
,
dtype
=
tf
.
float32
,
trainable
=
Fals
e
)
opt
=
tf
.
train
.
AdamOptimizer
(
lr
,
beta1
=
0.5
,
epsilon
=
1e-6
)
opt
=
tf
.
train
.
AdamOptimizer
(
lr
,
beta1
=
0.5
,
epsilon
=
1e-6
)
# generator learns 5 times faster
# generator learns 5 times faster
return
optimizer
.
apply_grad_processors
(
return
optimizer
.
apply_grad_processors
(
...
@@ -165,7 +190,7 @@ def get_data():
...
@@ -165,7 +190,7 @@ def get_data():
def
get_config
():
def
get_config
():
logger
.
auto_set_dir
()
logger
.
auto_set_dir
(
'd'
)
return
TrainConfig
(
return
TrainConfig
(
dataflow
=
get_data
(),
dataflow
=
get_data
(),
callbacks
=
[
ModelSaver
(
keep_freq
=
0.1
)],
callbacks
=
[
ModelSaver
(
keep_freq
=
0.1
)],
...
@@ -195,26 +220,26 @@ def sample(model_path):
...
@@ -195,26 +220,26 @@ def sample(model_path):
z_noise
=
np
.
random
.
uniform
(
-
1
,
1
,
(
100
,
NOISE_DIM
))
z_noise
=
np
.
random
.
uniform
(
-
1
,
1
,
(
100
,
NOISE_DIM
))
zc
=
np
.
concatenate
((
z_cat
,
z_uni
*
0
,
z_uni
*
0
),
axis
=
1
)
zc
=
np
.
concatenate
((
z_cat
,
z_uni
*
0
,
z_uni
*
0
),
axis
=
1
)
o
=
pred
(
zc
,
z_noise
)[
0
]
o
=
pred
(
zc
,
z_noise
)[
0
]
viz1
=
stack_patches
(
o
,
nr_row
=
10
,
nr_col
=
10
)
viz1
=
viz
.
stack_patches
(
o
,
nr_row
=
10
,
nr_col
=
10
)
viz1
=
cv2
.
resize
(
viz1
,
(
IMG_SIZE
,
IMG_SIZE
))
viz1
=
cv2
.
resize
(
viz1
,
(
IMG_SIZE
,
IMG_SIZE
))
# show effect of first continous variable with fixed noise
# show effect of first continous variable with fixed noise
zc
=
np
.
concatenate
((
z_cat
,
z_uni
,
z_uni
*
0
),
axis
=
1
)
zc
=
np
.
concatenate
((
z_cat
,
z_uni
,
z_uni
*
0
),
axis
=
1
)
o
=
pred
(
zc
,
z_noise
*
0
)[
0
]
o
=
pred
(
zc
,
z_noise
*
0
)[
0
]
viz2
=
stack_patches
(
o
,
nr_row
=
10
,
nr_col
=
10
)
viz2
=
viz
.
stack_patches
(
o
,
nr_row
=
10
,
nr_col
=
10
)
viz2
=
cv2
.
resize
(
viz2
,
(
IMG_SIZE
,
IMG_SIZE
))
viz2
=
cv2
.
resize
(
viz2
,
(
IMG_SIZE
,
IMG_SIZE
))
# show effect of second continous variable with fixed noise
# show effect of second continous variable with fixed noise
zc
=
np
.
concatenate
((
z_cat
,
z_uni
*
0
,
z_uni
),
axis
=
1
)
zc
=
np
.
concatenate
((
z_cat
,
z_uni
*
0
,
z_uni
),
axis
=
1
)
o
=
pred
(
zc
,
z_noise
*
0
)[
0
]
o
=
pred
(
zc
,
z_noise
*
0
)[
0
]
viz3
=
stack_patches
(
o
,
nr_row
=
10
,
nr_col
=
10
)
viz3
=
viz
.
stack_patches
(
o
,
nr_row
=
10
,
nr_col
=
10
)
viz3
=
cv2
.
resize
(
viz3
,
(
IMG_SIZE
,
IMG_SIZE
))
viz3
=
cv2
.
resize
(
viz3
,
(
IMG_SIZE
,
IMG_SIZE
))
viz
=
stack_patches
(
canvas
=
viz
.
stack_patches
(
[
viz1
,
viz2
,
viz3
],
[
viz1
,
viz2
,
viz3
],
nr_row
=
1
,
nr_col
=
3
,
border
=
5
,
bgcolor
=
(
255
,
0
,
0
))
nr_row
=
1
,
nr_col
=
3
,
border
=
5
,
bgcolor
=
(
255
,
0
,
0
))
interactive_imshow
(
viz
)
viz
.
interactive_imshow
(
canvas
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
tensorpack/tfutils/distributions.py
View file @
7d40e049
...
@@ -2,6 +2,7 @@ import tensorflow as tf
...
@@ -2,6 +2,7 @@ import tensorflow as tf
from
functools
import
wraps
from
functools
import
wraps
import
numpy
as
np
import
numpy
as
np
from
..utils.develop
import
log_deprecated
from
.common
import
get_tf_version_number
from
.common
import
get_tf_version_number
__all__
=
[
'Distribution'
,
__all__
=
[
'Distribution'
,
...
@@ -59,6 +60,7 @@ class Distribution(object):
...
@@ -59,6 +60,7 @@ class Distribution(object):
distribution.
distribution.
"""
"""
self
.
name
=
name
self
.
name
=
name
log_deprecated
(
"tfutils.distributions"
,
"Please use tf.distributions instead!"
,
"2017-12-10"
)
@
class_scope
@
class_scope
def
loglikelihood
(
self
,
x
,
theta
):
def
loglikelihood
(
self
,
x
,
theta
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment