fix #786

c346e924 · Yuxin Wu · 56a77747 · c346e924 · c346e924
Commit c346e924 authored Jun 08, 2018 by Yuxin Wu
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 4 deletions

docs/tutorial/extend/callback.md docs/tutorial/extend/callback.md +3 -2

examples/PennTreebank/PTB-LSTM.py examples/PennTreebank/PTB-LSTM.py +2 -2

No files found.
--- a/docs/tutorial/extend/callback.md
+++ b/docs/tutorial/extend/callback.md
@@ -80,8 +80,8 @@ You can overwrite any of the following methods to define a new callback:

  The training loops would become `sess.run([training_op, my_op])`.
  This is different from `sess.run(training_op); sess.run(my_op);`,
-  which is what you would get if you run `my_op` in `_trigger_step`.
-	Sometimes the difference matters, please choose carefully.
+  which is what you would get if you write `self.trainer.sess.run(my_op)` in `_trigger_step`.
+  Usually the difference matters, please choose carefully.

 * `_trigger_step(self)`

@@ -105,6 +105,7 @@ You can overwrite any of the following methods to define a new callback:
 * Access tensors / ops (details mentioned above):
 	* For existing tensors/ops created in the tower, access them through [self.trainer.towers](../../modules/train.html#tensorpack.train.TowerTrainer.towers).
 	* Extra tensors/ops have to be created in `_setup_graph` callback method.
+* Access the current graph and session by `self.trainer.graph` and `self.trainer.sess`.
 * Write stuff to the monitor backend, by `self.trainer.monitors.put_xxx`.
  The monitors might direct your events to TensorFlow events file, JSON file, stdout, etc.
  You can access history monitor data as well. See the docs for [Monitors](../../modules/callbacks.html#tensorpack.callbacks.Monitors)

--- a/examples/PennTreebank/PTB-LSTM.py
+++ b/examples/PennTreebank/PTB-LSTM.py
@@ -57,7 +57,7 @@ class Model(ModelDesc):
        def get_basic_cell():
            cell = rnn.BasicLSTMCell(num_units=HIDDEN_SIZE, forget_bias=0.0, reuse=tf.get_variable_scope().reuse)
            if is_training:
-                cell = rnn.DropoutWrapper(cell, output_keep_prob=DROPOUT)
+                cell = rnn.DropoutWrapper(cell, output_keep_prob=1 - DROPOUT)
            return cell

        cell = rnn.MultiRNNCell([get_basic_cell() for _ in range(NUM_LAYER)])
@@ -73,7 +73,7 @@ class Model(ModelDesc):

        embeddingW = tf.get_variable('embedding', [VOCAB_SIZE, HIDDEN_SIZE], initializer=initializer)
        input_feature = tf.nn.embedding_lookup(embeddingW, input)  # B x seqlen x hiddensize
-        input_feature = Dropout(input_feature, rate=DROPOUT)
+        input_feature = Dropout(input_feature, keep_prob=1 - DROPOUT)

        with tf.variable_scope('LSTM', initializer=initializer):
            input_list = tf.unstack(input_feature, num=SEQ_LEN, axis=1)  # seqlen x (Bxhidden)