add flownet2 inference examples (#853)

* add flownet2-S inference example * fix readme * flake8 fix * fix flownet-s, flownet-c scaling * uuu * Small simplifications * Add centercrop in inference. Update readme

add flownet2 inference examples (#853)
* add flownet2-S inference example * fix readme * flake8 fix * fix flownet-s, flownet-c scaling * uuu * Small simplifications * Add centercrop in inference. Update readme
0d36de5f · Patrick Wieschollek · Yuxin Wu · 01245d68 · 0d36de5f · 0d36de5f
Commit 0d36de5f authored Aug 24, 2018 by Patrick Wieschollek Committed by Yuxin Wu Aug 24, 2018
6 changed files
--- a/examples/OpticalFlow/README.md
+++ b/examples/OpticalFlow/README.md
+## OpticalFlow - FlowNet2
+
+Load and run the pre-trained model in
+[FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks](https://arxiv.org/abs/1612.01925)
+by Ilg et al.
+
+Given two images, the network is trained to predict the optical flow between these images.
+
+<p align="center"> <img src="./preview.jpg" width="100%"> </p>
+
+* Top: both input images from Flying Chairs, ground-truth, original FlowNet2 results (Caffe)
+* Bottom: Converted FlowNet2-C, FlowNet2-S, FlowNet2 results (this implementation)
+
+| Model     | AEE (sintel clean) |
+| ------    | ------             |
+| FlowNet-S | 3.82               |
+| FlowNet-C | 3.08               |
+| FlowNet2  | 2.10               |
+
+The authors report the AEE of *2.03* (Caffe Model) on Sintel-clean and our implementation gives an AEE of *2.10*,
+which is better than other TensorFlow implementations.
+
+
+### Usage
+
+1. Download the pre-trained model:
+
+```bash
+wget http://models.tensorpack.com/OpticalFlow/flownet2.npz
+wget http://models.tensorpack.com/OpticalFlow/flownet2-s.npz
+wget http://models.tensorpack.com/OpticalFlow/flownet2-c.npz
+```
+
+*Note:* You are required to accept the [author's license](https://github.com/lmb-freiburg/flownet2#license-and-citation) to use these weights.
+
+2. Run inference
+
+```bash
+python flownet2.py
+			--left left.png --right right.png \
+			--load flownet2.npz --model flownet2
+```
+
+3. Evaluate AEE (Average Endpoing Error) on Sintel dataset:
+
+```
+wget http://files.is.tue.mpg.de/sintel/MPI-Sintel-complete.zip
+unzip MPI-Sintel-complete.zip
+python flownet2.py --load flownet2.npz --model flownet2 --sintel_path /path/to/Sintel/training
+```
--- a/examples/OpticalFlow/flownet2.py
+++ b/examples/OpticalFlow/flownet2.py
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# Author: Patrick Wieschollek <mail@patwie.com>
+
+import os
+import cv2
+import glob
+from helper import Flow
+import argparse
+
+from tensorpack import *
+from tensorpack.utils import viz
+
+import flownet_models as models
+
+
+def apply(model, model_path, left, right, ground_truth=None):
+    left = cv2.imread(left)
+    right = cv2.imread(right)
+
+    h, w = left.shape[:2]
+    newh = (h // 64) * 64
+    neww = (w // 64) * 64
+    aug = imgaug.CenterCrop((newh, neww))
+    left, right = aug.augment(left), aug.augment(right)
+
+    predict_func = OfflinePredictor(PredictConfig(
+        model=model(height=newh, width=neww),
+        session_init=get_model_loader(model_path),
+        input_names=['left', 'right'],
+        output_names=['prediction']))
+
+    left_input, right_input = [x.astype('float32').transpose(2, 0, 1)[None, ...]
+                               for x in [left, right]]
+    output = predict_func(left_input, right_input)[0].transpose(0, 2, 3, 1)
+    flow = Flow()
+
+    img = flow.visualize(output[0])
+    patches = [left, right, img * 255.]
+    if ground_truth is not None:
+        patches.append(flow.visualize(Flow.read(ground_truth)) * 255.)
+    img = viz.stack_patches(patches, 2, 2)
+
+    cv2.imshow('flow output', img)
+    cv2.imwrite('flow_prediction.png', img)
+    cv2.waitKey(0)
+
+
+class SintelData(DataFlow):
+
+    def __init__(self, data_path):
+        super(SintelData, self).__init__()
+        self.data_path = data_path
+        self.path_prefix = os.path.join(data_path, 'flow')
+        assert os.path.isdir(self.path_prefix), self.path_prefix
+        self.flows = glob.glob(os.path.join(self.path_prefix, '*', '*.flo'))
+
+    def size(self):
+        return len(self.flows)
+
+    def get_data(self):
+        for flow_path in self.flows:
+            input_path = flow_path.replace(
+                self.path_prefix, os.path.join(self.data_path, 'clean'))
+            frame_id = int(input_path[-8:-4])
+            input_a_path = '%s%04i.png' % (input_path[:-8], frame_id)
+            input_b_path = '%s%04i.png' % (input_path[:-8], frame_id + 1)
+
+            input_a = cv2.imread(input_a_path)
+            input_b = cv2.imread(input_b_path)
+            flow = Flow.read(flow_path)
+
+            # most implementation just crop the center
+            # which seems to be accepted practise
+            h, w = input_a.shape[:2]
+            newh = (h // 64) * 64
+            neww = (w // 64) * 64
+            aug = imgaug.CenterCrop((newh, neww))
+            input_a = aug.augment(input_a)
+            input_b = aug.augment(input_b)
+            flow = aug.augment(flow)
+            yield [input_a, input_b, flow]
+
+
+def inference(model, model_path, sintel_path):
+    ds = SintelData(sintel_path)
+
+    def nhwc2nchw(dp):
+        return [dp[0].transpose(2, 0, 1),
+                dp[1].transpose(2, 0, 1),
+                dp[2].transpose(2, 0, 1)]
+
+    ds = MapData(ds, nhwc2nchw)
+    ds = BatchData(ds, 1)
+    ds.reset_state()
+
+    # look at shape information (all images in Sintel has the same shape)
+    h, w = next(ds.get_data())[0].shape[2:]
+
+    pred = PredictConfig(
+        model=model(height=h, width=w),
+        session_init=get_model_loader(model_path),
+        input_names=['left', 'right', 'gt_flow'],
+        output_names=['epe', 'prediction'])
+    pred = SimpleDatasetPredictor(pred, ds)
+
+    avg_epe, count_epe = 0, 0
+
+    for o in pred.get_result():
+        avg_epe += o[0]
+        count_epe += 1
+
+    print('average endpoint error (AEE): %f' % (float(avg_epe) / float(count_epe)))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--load', help='path to the model', required=True)
+    parser.add_argument('--model', help='model',
+                        choices=['flownet2', 'flownet2-s', 'flownet2-c'], required=True)
+    parser.add_argument('--left', help='input')
+    parser.add_argument('--right', help='input')
+    parser.add_argument('--gt', help='path to ground truth flow')
+    parser.add_argument('--sintel_path', help='path to sintel dataset')
+    args = parser.parse_args()
+
+    model = {'flownet2-s': models.FlowNet2S,
+             'flownet2-c': models.FlowNet2C,
+             'flownet2': models.FlowNet2}[args.model]
+
+    if args.sintel_path:
+        inference(model, args.load, args.sintel_path)
+    else:
+        apply(model, args.load, args.left, args.right, args.gt)
--- a/examples/OpticalFlow/flownet_models.py
+++ b/examples/OpticalFlow/flownet_models.py
--- a/examples/OpticalFlow/helper.py
+++ b/examples/OpticalFlow/helper.py
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Helper for Optical Flow visualization
+"""
+
+import numpy as np
+
+
+class Flow(object):
+    """
+    based on https://github.com/cgtuebingen/learning-blind-motion-deblurring/blob/master/synthblur/src/flow.cpp#L44
+    """
+    def __init__(self):
+        super(Flow, self).__init__()
+        self.wheel = None
+        self._construct_wheel()
+
+    @staticmethod
+    def read(file):
+        # https://stackoverflow.com/a/44906777/7443104
+        with open(file, 'rb') as f:
+            magic = np.fromfile(f, np.float32, count=1)
+            if 202021.25 != magic:
+                raise Exception('Magic number incorrect. Invalid .flo file')
+            else:
+                w = np.fromfile(f, np.int32, count=1)[0]
+                h = np.fromfile(f, np.int32, count=1)[0]
+                data = np.fromfile(f, np.float32, count=2 * w * h)
+                return np.resize(data, (h, w, 2))
+
+    def _construct_wheel(self):
+        k = 0
+
+        RY, YG, GC = 15, 6, 4
+        YG, GC, CB = 6, 4, 11
+        BM, MR = 13, 6
+
+        self.wheel = np.zeros((55, 3), dtype=np.float32)
+
+        for i in range(RY):
+            self.wheel[k] = np.array([255., 255. * i / float(RY), 0])
+            k += 1
+
+        for i in range(YG):
+            self.wheel[k] = np.array([255. - 255. * i / float(YG), 255., 0])
+            k += 1
+
+        for i in range(GC):
+            self.wheel[k] = np.array([0, 255., 255. * i / float(GC)])
+            k += 1
+
+        for i in range(CB):
+            self.wheel[k] = np.array([0, 255. - 255. * i / float(CB), 255.])
+            k += 1
+
+        for i in range(BM):
+            self.wheel[k] = np.array([255. * i / float(BM), 0, 255.])
+            k += 1
+
+        for i in range(MR):
+            self.wheel[k] = np.array([255., 0, 255. - 255. * i / float(MR)])
+            k += 1
+
+        self.wheel = self.wheel / 255.
+
+    def visualize(self, nnf):
+        assert len(nnf.shape) == 3
+        assert nnf.shape[2] == 2
+
+        RY, YG, GC = 15, 6, 4
+        YG, GC, CB = 6, 4, 11
+        BM, MR = 13, 6
+        NCOLS = RY + YG + GC + CB + BM + MR
+
+        fx = nnf[:, :, 0].astype(np.float32)
+        fy = nnf[:, :, 1].astype(np.float32)
+
+        h, w = fx.shape[:2]
+        fx = fx.reshape([-1])
+        fy = fy.reshape([-1])
+
+        rad = np.sqrt(fx * fx + fy * fy)
+
+        max_rad = rad.max()
+
+        a = np.arctan2(-fy, -fx) / np.pi
+        fk = (a + 1.0) / 2.0 * (NCOLS - 1)
+        k0 = fk.astype(np.int32)
+        k1 = (k0 + 1) % NCOLS
+        f = (fk - k0).astype(np.float32)
+
+        color0 = self.wheel[k0, :]
+        color1 = self.wheel[k1, :]
+
+        f = np.stack([f, f, f], axis=-1)
+        color = (1 - f) * color0 + f * color1
+
+        color = 1 - (np.expand_dims(rad, axis=-1) / max_rad) * (1 - color)
+
+        return color.reshape(h, w, 3)[:, :, ::-1]
+
+
+if __name__ == '__main__':
+    import cv2
+    nnf = Flow.read('/tmp/data2/07446_flow.flo')
+    v = Flow()
+    rgb = v.visualize(nnf)
+    cv2.imshow('rgb', rgb)
+    cv2.waitKey(0)
--- a/examples/OpticalFlow/preview.jpg
+++ b/examples/OpticalFlow/preview.jpg
--- a/examples/README.md
+++ b/examples/README.md
@@ -36,6 +36,7 @@ These are all the toy examples in tensorpack. They are supposed to be just demos
 | Single-image super-resolution using [EnhanceNet](SuperResolution)                                                                                     |                    |
 | Learn steering filters with [Dynamic Filter Networks](DynamicFilterNetwork)                                                                           | visually reproduce |
 | Load a pre-trained [AlexNet, VGG, or Convolutional Pose Machines](CaffeModels)                                                                        |                    |
+| Load a pre-trained [FlowNet2-S, FlowNet2-C, FlowNet2](OpticalFlow) | |

 ## Reinforcement Learning:
 | Name                                                                                                     | Performance     |