Commit 4f3d4e27 authored by Yuxin Wu's avatar Yuxin Wu

misc fixes..

parent 1188b56d
......@@ -38,7 +38,6 @@ Models I trained can be downloaded [here](https://drive.google.com/drive/folders
To view the loss curve:
```bash
cat train_log/hed/stat.json | jq '.[] |
[.xentropy1,.xentropy2,.xentropy3,.xentropy4,.xentropy5,.xentropy6] |
map(tostring) | join("\t") | .' -r | \
"\(.xentropy1)\t\(.xentropy2)\t\(.xentropy3)\t\(.xentropy4)\t\(.xentropy5)\t\(.xentropy6)"' -r | \
../../scripts/plot-point.py --legend 1,2,3,4,5,final --decay 0.8
```
......@@ -18,8 +18,8 @@ from tensorpack.tfutils.symbolic_functions import *
from tensorpack.tfutils.summary import *
"""
Training code of Pre-Activation version of ResNet on ImageNet. Work In Progress.
Top1 error is now about 0.5% higher than fb.resnet.torch.
Training code of Pre-Activation version of ResNet on ImageNet.
Mainly follow the setup in fb.resnet.torch
"""
......
......@@ -207,8 +207,9 @@ class StatMonitorParamSetter(HyperParamSetter):
):
"""
Change param by `new_value = value_func(old_value)`,
if `stat_name` wasn't decreasing > threshold times in the lastest
last_k times of statistics update.
if :
min(stats) >= stats[0] - threshold, where
stats = [`stat_nam` in latest `last_k` epochs]
For example, if error wasn't decreasing, anneal the learning rate:
StatMonitorParamSetter('learning_rate', 'val-error', lambda x: x * 0.2)
......@@ -235,11 +236,11 @@ class StatMonitorParamSetter(HyperParamSetter):
hist_first = hist[0]
if not self.reverse:
hist_min = min(hist)
if hist_min <= hist_first - self.threshold: # small enough
if hist_min < hist_first - self.threshold: # small enough
return None
else:
hist_max = max(hist)
if hist_max >= hist_first + self.threshold: # large enough
if hist_max > hist_first + self.threshold: # large enough
return None
self.last_changed_epoch = self.epoch_num
return self.value_func(self.get_current_value())
......
......@@ -4,7 +4,7 @@
import sys, os
import cv2
import multiprocessing
import multiprocessing as mp
from ..utils.concurrency import DIE
from ..utils.fs import mkdir_p
......@@ -44,8 +44,8 @@ def dataflow_to_process_queue(ds, size, nr_consumer):
:returns: (queue, process). The process will take data from `ds` to fill
the queue once you start it. Each element is (task_id, dp).
"""
q = multiprocessing.Queue(size)
class EnqueProc(multiprocessing.Process):
q = mp.Queue(size)
class EnqueProc(mp.Process):
def __init__(self, ds, q, nr_consumer):
super(EnqueProc, self).__init__()
self.ds = ds
......
......@@ -101,7 +101,7 @@ class MultiProcessDatasetPredictor(DatasetPredictorBase):
# setup all the procs
self.inqueue_proc.start()
for p, gpuid in zip(self.workers, gpus):
if gpuid == '':
if gpuid == '-1':
logger.info("Worker {} uses CPU".format(p.idx))
else:
logger.info("Worker {} uses GPU {}".format(p.idx, gpuid))
......
......@@ -62,7 +62,7 @@ class SaverRestore(SessionInit):
if os.path.basename(model_path) == 'checkpoint':
model_path = tf.train.latest_checkpoint(os.path.dirname(model_path))
# to be consistent with either v1 or v2
assert os.path.isfile(model_path) or os.path.isfile(model_path + '.index')
assert os.path.isfile(model_path) or os.path.isfile(model_path + '.index'), model_path
self.set_path(model_path)
self.prefix = prefix
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment