Commit 0d36de5f authored by Patrick Wieschollek's avatar Patrick Wieschollek Committed by Yuxin Wu

add flownet2 inference examples (#853)

* add flownet2-S inference example

* fix readme

* flake8 fix

* fix flownet-s, flownet-c scaling

* uuu

* Small simplifications

* Add centercrop in inference. Update readme
parent 01245d68
## OpticalFlow - FlowNet2
Load and run the pre-trained model in
[FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks](https://arxiv.org/abs/1612.01925)
by Ilg et al.
Given two images, the network is trained to predict the optical flow between these images.
<p align="center"> <img src="./preview.jpg" width="100%"> </p>
* Top: both input images from Flying Chairs, ground-truth, original FlowNet2 results (Caffe)
* Bottom: Converted FlowNet2-C, FlowNet2-S, FlowNet2 results (this implementation)
| Model | AEE (sintel clean) |
| ------ | ------ |
| FlowNet-S | 3.82 |
| FlowNet-C | 3.08 |
| FlowNet2 | 2.10 |
The authors report the AEE of *2.03* (Caffe Model) on Sintel-clean and our implementation gives an AEE of *2.10*,
which is better than other TensorFlow implementations.
### Usage
1. Download the pre-trained model:
```bash
wget http://models.tensorpack.com/OpticalFlow/flownet2.npz
wget http://models.tensorpack.com/OpticalFlow/flownet2-s.npz
wget http://models.tensorpack.com/OpticalFlow/flownet2-c.npz
```
*Note:* You are required to accept the [author's license](https://github.com/lmb-freiburg/flownet2#license-and-citation) to use these weights.
2. Run inference
```bash
python flownet2.py
--left left.png --right right.png \
--load flownet2.npz --model flownet2
```
3. Evaluate AEE (Average Endpoing Error) on Sintel dataset:
```
wget http://files.is.tue.mpg.de/sintel/MPI-Sintel-complete.zip
unzip MPI-Sintel-complete.zip
python flownet2.py --load flownet2.npz --model flownet2 --sintel_path /path/to/Sintel/training
```
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# Author: Patrick Wieschollek <mail@patwie.com>
import os
import cv2
import glob
from helper import Flow
import argparse
from tensorpack import *
from tensorpack.utils import viz
import flownet_models as models
def apply(model, model_path, left, right, ground_truth=None):
left = cv2.imread(left)
right = cv2.imread(right)
h, w = left.shape[:2]
newh = (h // 64) * 64
neww = (w // 64) * 64
aug = imgaug.CenterCrop((newh, neww))
left, right = aug.augment(left), aug.augment(right)
predict_func = OfflinePredictor(PredictConfig(
model=model(height=newh, width=neww),
session_init=get_model_loader(model_path),
input_names=['left', 'right'],
output_names=['prediction']))
left_input, right_input = [x.astype('float32').transpose(2, 0, 1)[None, ...]
for x in [left, right]]
output = predict_func(left_input, right_input)[0].transpose(0, 2, 3, 1)
flow = Flow()
img = flow.visualize(output[0])
patches = [left, right, img * 255.]
if ground_truth is not None:
patches.append(flow.visualize(Flow.read(ground_truth)) * 255.)
img = viz.stack_patches(patches, 2, 2)
cv2.imshow('flow output', img)
cv2.imwrite('flow_prediction.png', img)
cv2.waitKey(0)
class SintelData(DataFlow):
def __init__(self, data_path):
super(SintelData, self).__init__()
self.data_path = data_path
self.path_prefix = os.path.join(data_path, 'flow')
assert os.path.isdir(self.path_prefix), self.path_prefix
self.flows = glob.glob(os.path.join(self.path_prefix, '*', '*.flo'))
def size(self):
return len(self.flows)
def get_data(self):
for flow_path in self.flows:
input_path = flow_path.replace(
self.path_prefix, os.path.join(self.data_path, 'clean'))
frame_id = int(input_path[-8:-4])
input_a_path = '%s%04i.png' % (input_path[:-8], frame_id)
input_b_path = '%s%04i.png' % (input_path[:-8], frame_id + 1)
input_a = cv2.imread(input_a_path)
input_b = cv2.imread(input_b_path)
flow = Flow.read(flow_path)
# most implementation just crop the center
# which seems to be accepted practise
h, w = input_a.shape[:2]
newh = (h // 64) * 64
neww = (w // 64) * 64
aug = imgaug.CenterCrop((newh, neww))
input_a = aug.augment(input_a)
input_b = aug.augment(input_b)
flow = aug.augment(flow)
yield [input_a, input_b, flow]
def inference(model, model_path, sintel_path):
ds = SintelData(sintel_path)
def nhwc2nchw(dp):
return [dp[0].transpose(2, 0, 1),
dp[1].transpose(2, 0, 1),
dp[2].transpose(2, 0, 1)]
ds = MapData(ds, nhwc2nchw)
ds = BatchData(ds, 1)
ds.reset_state()
# look at shape information (all images in Sintel has the same shape)
h, w = next(ds.get_data())[0].shape[2:]
pred = PredictConfig(
model=model(height=h, width=w),
session_init=get_model_loader(model_path),
input_names=['left', 'right', 'gt_flow'],
output_names=['epe', 'prediction'])
pred = SimpleDatasetPredictor(pred, ds)
avg_epe, count_epe = 0, 0
for o in pred.get_result():
avg_epe += o[0]
count_epe += 1
print('average endpoint error (AEE): %f' % (float(avg_epe) / float(count_epe)))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--load', help='path to the model', required=True)
parser.add_argument('--model', help='model',
choices=['flownet2', 'flownet2-s', 'flownet2-c'], required=True)
parser.add_argument('--left', help='input')
parser.add_argument('--right', help='input')
parser.add_argument('--gt', help='path to ground truth flow')
parser.add_argument('--sintel_path', help='path to sintel dataset')
args = parser.parse_args()
model = {'flownet2-s': models.FlowNet2S,
'flownet2-c': models.FlowNet2C,
'flownet2': models.FlowNet2}[args.model]
if args.sintel_path:
inference(model, args.load, args.sintel_path)
else:
apply(model, args.load, args.left, args.right, args.gt)
This diff is collapsed.
#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""
Helper for Optical Flow visualization
"""
import numpy as np
class Flow(object):
"""
based on https://github.com/cgtuebingen/learning-blind-motion-deblurring/blob/master/synthblur/src/flow.cpp#L44
"""
def __init__(self):
super(Flow, self).__init__()
self.wheel = None
self._construct_wheel()
@staticmethod
def read(file):
# https://stackoverflow.com/a/44906777/7443104
with open(file, 'rb') as f:
magic = np.fromfile(f, np.float32, count=1)
if 202021.25 != magic:
raise Exception('Magic number incorrect. Invalid .flo file')
else:
w = np.fromfile(f, np.int32, count=1)[0]
h = np.fromfile(f, np.int32, count=1)[0]
data = np.fromfile(f, np.float32, count=2 * w * h)
return np.resize(data, (h, w, 2))
def _construct_wheel(self):
k = 0
RY, YG, GC = 15, 6, 4
YG, GC, CB = 6, 4, 11
BM, MR = 13, 6
self.wheel = np.zeros((55, 3), dtype=np.float32)
for i in range(RY):
self.wheel[k] = np.array([255., 255. * i / float(RY), 0])
k += 1
for i in range(YG):
self.wheel[k] = np.array([255. - 255. * i / float(YG), 255., 0])
k += 1
for i in range(GC):
self.wheel[k] = np.array([0, 255., 255. * i / float(GC)])
k += 1
for i in range(CB):
self.wheel[k] = np.array([0, 255. - 255. * i / float(CB), 255.])
k += 1
for i in range(BM):
self.wheel[k] = np.array([255. * i / float(BM), 0, 255.])
k += 1
for i in range(MR):
self.wheel[k] = np.array([255., 0, 255. - 255. * i / float(MR)])
k += 1
self.wheel = self.wheel / 255.
def visualize(self, nnf):
assert len(nnf.shape) == 3
assert nnf.shape[2] == 2
RY, YG, GC = 15, 6, 4
YG, GC, CB = 6, 4, 11
BM, MR = 13, 6
NCOLS = RY + YG + GC + CB + BM + MR
fx = nnf[:, :, 0].astype(np.float32)
fy = nnf[:, :, 1].astype(np.float32)
h, w = fx.shape[:2]
fx = fx.reshape([-1])
fy = fy.reshape([-1])
rad = np.sqrt(fx * fx + fy * fy)
max_rad = rad.max()
a = np.arctan2(-fy, -fx) / np.pi
fk = (a + 1.0) / 2.0 * (NCOLS - 1)
k0 = fk.astype(np.int32)
k1 = (k0 + 1) % NCOLS
f = (fk - k0).astype(np.float32)
color0 = self.wheel[k0, :]
color1 = self.wheel[k1, :]
f = np.stack([f, f, f], axis=-1)
color = (1 - f) * color0 + f * color1
color = 1 - (np.expand_dims(rad, axis=-1) / max_rad) * (1 - color)
return color.reshape(h, w, 3)[:, :, ::-1]
if __name__ == '__main__':
import cv2
nnf = Flow.read('/tmp/data2/07446_flow.flo')
v = Flow()
rgb = v.visualize(nnf)
cv2.imshow('rgb', rgb)
cv2.waitKey(0)
...@@ -36,6 +36,7 @@ These are all the toy examples in tensorpack. They are supposed to be just demos ...@@ -36,6 +36,7 @@ These are all the toy examples in tensorpack. They are supposed to be just demos
| Single-image super-resolution using [EnhanceNet](SuperResolution) | | | Single-image super-resolution using [EnhanceNet](SuperResolution) | |
| Learn steering filters with [Dynamic Filter Networks](DynamicFilterNetwork) | visually reproduce | | Learn steering filters with [Dynamic Filter Networks](DynamicFilterNetwork) | visually reproduce |
| Load a pre-trained [AlexNet, VGG, or Convolutional Pose Machines](CaffeModels) | | | Load a pre-trained [AlexNet, VGG, or Convolutional Pose Machines](CaffeModels) | |
| Load a pre-trained [FlowNet2-S, FlowNet2-C, FlowNet2](OpticalFlow) | |
## Reinforcement Learning: ## Reinforcement Learning:
| Name | Performance | | Name | Performance |
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment