#include "vp9/common/vp9_motion_model.h"
#include "vp9/encoder/vp9_corner_detect.h"
#include "vp9/encoder/vp9_corner_match.h"
+#include "vp9/encoder/vp9_opticalflow.h"
#include "vp9/encoder/vp9_ransac.h"
#include "vp9/encoder/vp9_global_motion.h"
#include "vp9/encoder/vp9_motion_field.h"
#define MIN_INLIER_PROB 0.1
+// #define PARAM_SEARCH
+
#define MAX_CORNERS 4096
INLINE ransacType get_ransacType(TransformationType type) {
return sqrt(sqerr / n);
}
+void refine_param(TransformationType type, unsigned char *frm,
+ unsigned char *ref, double *H,
+ int param_index, int width, int height,
+ int stride, int n_refinements) {
+ int i;
+ double step_mse;
+ double best_mse;
+ double curr_mse;
+ double curr_param = H[param_index];
+ double step = 0.5;
+ double best_param = curr_param;
+
+ curr_mse = compute_warp_and_error(type, ref, frm, width, height, stride, H);
+ best_mse = curr_mse;
+ for (i = 0; i < n_refinements; i++) {
+ // look to the left
+ H[param_index] = curr_param - step;
+ step_mse = compute_warp_and_error(type, ref, frm, width, height, stride, H);
+ if (step_mse < best_mse) {
+ step /= 2;
+ best_mse = step_mse;
+ best_param = H[param_index];
+ curr_param = best_param;
+ curr_mse = step_mse;
+ continue;
+ }
+
+ // look to the right
+ H[param_index] = curr_param + step;
+ step_mse = compute_warp_and_error(type, ref, frm, width, height, stride, H);
+ if (step_mse < best_mse) {
+ step /= 2;
+ best_mse = step_mse;
+ best_param = H[param_index];
+ curr_param = best_param;
+ curr_mse = step_mse;
+ continue;
+ }
+
+ // no improvement found-> means we're either already at a minimum or
+ // step is too wide
+ step /= 4;
+ }
+
+ H[param_index] = best_param;
+}
+
static int compute_global_motion_single(TransformationType type,
- int *correspondences,
+ double *correspondences,
int num_correspondences,
double *H,
int *inlier_map) {
double *mp, *matched_points;
- int *cp = correspondences;
+ double *cp = correspondences;
int i, result;
int num_inliers = 0;
ransacType ransac = get_ransacType(type);
double *H) {
int num_frm_corners, num_ref_corners;
int num_correspondences;
- int *correspondences;
+ double *correspondences;
int num_inliers;
int *inlier_map = NULL;
int frm_corners[2 * MAX_CORNERS], ref_corners[2 * MAX_CORNERS];
(void) cpi;
+
#ifdef USE_FAST_CORNER
num_frm_corners = FastCornerDetect(frm->y_buffer, frm->y_width,
frm->y_height, frm->y_stride,
printf("Reference corners = %d\n", num_ref_corners);
#endif
- correspondences = (int *) malloc(num_frm_corners * 4 *
+ correspondences = (double *) malloc(num_frm_corners * 4 *
sizeof(*correspondences));
num_correspondences = determine_correspondence(frm->y_buffer,
}
static int compute_global_motion_multiple(TransformationType type,
- int *correspondences,
+ double *correspondences,
int num_correspondences,
double *H,
int max_models,
double inlier_prob,
int *num_models,
int *processed_mask) {
- int *cp = correspondences;
+ double *cp = correspondences;
double *mp, *matched_points;
int *best_inlier_mask;
int i, result;
return num_inliers_sum;
}
+int vp9_compute_global_motion_multiple_optical_flow(struct VP9_COMP *cpi,
+ TransformationType type,
+ YV12_BUFFER_CONFIG *frm,
+ YV12_BUFFER_CONFIG *ref,
+ int max_models,
+ double inlier_prob,
+ double *H) {
+ int num_correspondences = 0;
+ double *correspondence_pts = (double *)malloc(frm->y_width * frm->y_height *
+ sizeof(correspondence));
+ correspondence *correspondences = (correspondence *)correspondence_pts;
+ int num_inliers;
+ int i, j;
+ int num_models = 0;
+ int *inlier_map = NULL;
+ double *u = (double *)malloc(frm->y_width * frm->y_height * sizeof(u));
+ double *v = (double *)malloc(frm->y_width * frm->y_height * sizeof(v));
+ double *confidence = (double *)malloc(frm->y_width * frm->y_height *
+ sizeof(confidence));
+
+ (void) cpi;
+ compute_flow(frm->y_buffer, ref->y_buffer, u, v, confidence,
+ frm->y_width, frm->y_height, frm->y_stride,
+ ref->y_stride, 1);
+
+ // avoid the boundaries because of artifacts
+ for (i = 10; i < frm->y_width - 10; ++i)
+ for (j = 10; j < frm->y_height - 10; ++j) {
+ // Note that this threshold could still benefit from tuning. Confidence
+ // is based on the inverse of the harris corner score.
+ if (confidence[i + j * frm->y_width] < 0.01) {
+ correspondences[num_correspondences].x = i;
+ correspondences[num_correspondences].y = j;
+ correspondences[num_correspondences].rx = i - u[i + j * frm->y_width];
+ correspondences[num_correspondences].ry = j - v[i + j * frm->y_width];
+ num_correspondences++;
+ }
+ }
+ printf("Number of correspondences = %d\n", num_correspondences);
+#ifdef VERBOSE
+ printf("Number of correspondences = %d\n", num_correspondences);
+#endif
+ inlier_map = (int *)malloc(num_correspondences * sizeof(*inlier_map));
+ num_inliers = compute_global_motion_multiple(type, correspondence_pts,
+ num_correspondences, H,
+ max_models, inlier_prob,
+ &num_models, inlier_map);
+#ifdef PARAM_SEARCH
+ for (j = 0; j < num_models; ++j) {
+ for (i = 0; i < get_numparams(type); ++i)
+
+ refine_param(type, frm->y_buffer, ref->y_buffer, H,
+ i + j * get_numparams(type), frm->y_width, frm->y_height,
+ frm->y_stride, 2);
+ }
+#endif
+
+#ifdef VERBOSE
+ printf("Models = %d, Inliers = %d\n", num_models, num_inliers);
+ if (num_models)
+ printf("Error Score (inliers) = %g\n",
+ compute_error_score(type, correspondences, 4, correspondences + 2, 4,
+ num_correspondences, H, inlier_map));
+ printf("Warp error score = %g\n",
+ vp9_warp_error_unq(ROTZOOM, H, ref->y_buffer, ref->y_crop_width,
+ ref->y_crop_height, ref->y_stride,
+ frm->y_buffer, 0, 0,
+ frm->y_crop_width, frm->y_crop_height,
+ frm->y_stride, 0, 0, 16, 16));
+#endif
+ (void) num_inliers;
+ free(correspondences);
+ free(inlier_map);
+ free(u);
+ free(v);
+ free(confidence);
+ return num_models;
+}
+
// Returns number of models actually returned
int vp9_compute_global_motion_multiple_feature_based(
struct VP9_COMP *cpi,
double *H) {
int num_frm_corners, num_ref_corners;
int num_correspondences;
- int *correspondences;
+ double *correspondences;
int num_inliers;
+ int i, j;
int frm_corners[2 * MAX_CORNERS], ref_corners[2 * MAX_CORNERS];
int num_models = 0;
int *inlier_map = NULL;
(void) cpi;
+ (void) i;
+ (void) j;
#ifdef USE_FAST_CORNER
num_frm_corners = FastCornerDetect(frm->y_buffer, frm->y_width,
printf("Reference corners = %d\n", num_ref_corners);
#endif
- correspondences = (int *) malloc(num_frm_corners * 4 *
+ correspondences = (double *) malloc(num_frm_corners * 4 *
sizeof(*correspondences));
num_correspondences = determine_correspondence(frm->y_buffer,
frm->y_width, frm->y_height,
frm->y_stride, ref->y_stride,
correspondences);
+ printf("Number of correspondences = %d\n", num_correspondences);
#ifdef VERBOSE
printf("Number of correspondences = %d\n", num_correspondences);
#endif
num_correspondences, H,
max_models, inlier_prob,
&num_models, inlier_map);
+#ifdef PARAM_SEARCH
+ for (j = 0; j < num_models; ++j) {
+ for (i = 0; i < get_numparams(type); ++i)
+
+ refine_param(type, frm->y_buffer, ref->y_buffer, H,
+ i + j * get_numparams(type), frm->y_width, frm->y_height,
+ frm->y_stride, 3);
+ }
+#endif
+
#ifdef VERBOSE
printf("Models = %d, Inliers = %d\n", num_models, num_inliers);
if (num_models)
double *H) {
VP9_COMMON *const cm = &cpi->common;
int num_correspondences = 0;
- int *correspondences;
+ double *correspondences;
int num_inliers;
int *inlier_map = NULL;
int bwidth = num_4x4_blocks_wide_lookup[bsize] << 2;
vp9_get_frame_motionfield(cpi, frm, ref, bsize, motionfield, confidence);
- correspondences = (int *)malloc(4 * cm->mb_rows * cm->mb_cols *
+ correspondences = (double *)malloc(4 * cm->mb_rows * cm->mb_cols *
sizeof(*correspondences));
for (i = 0; i < cm->mb_rows * cm->mb_cols; i ++) {
double *H) {
VP9_COMMON *const cm = &cpi->common;
int num_correspondences = 0;
- int *correspondences;
+ double *correspondences;
int num_inliers;
int num_models = 0;
int *inlier_map = NULL;
double confidence[4096];
vp9_get_frame_motionfield(cpi, frm, ref, bsize, motionfield, confidence);
- correspondences = (int *)malloc(4 * cm->mb_rows * cm->mb_cols *
+ correspondences = (double *)malloc(4 * cm->mb_rows * cm->mb_cols *
sizeof(*correspondences));
for (i = 0; i < cm->mb_rows * cm->mb_cols; i ++) {
--- /dev/null
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be
+ * found in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <time.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <unistd.h>
+#include <math.h>
+#include <stdio.h>
+
+#include "vp9/common/vp9_motion_model.h"
+#include "vp9/encoder/vp9_opticalflow.h"
+#include "vp9/encoder/vp9_ransac.h"
+#include "vp9/encoder/vp9_resize.h"
+
+// ITERATIONS is the number of iterative refinements to make on an initial
+// flow estimate. 3 seems to work well but feel free to play with this.
+#define ITERATIONS 3
+#define MAX_MEDIAN_LENGTH 121
+#define MAX_LEVELS 8
+#define MIN_LEVEL_SIZE 30
+#define MAX_ERROR 0.00001
+#define BLUR_SIZE 11
+#define BLUR_SIGMA 1.5
+#define BLUR_AMP 1
+#define MEDIAN_FILTER_SIZE 5
+#define RELATIVE_ERROR(a, b) (fabs(((a) - (b))))
+#define WRAP_PIXEL(p, size) ((p) < 0 ? abs((p)) - 1 : \
+ ((p) >= (size) ? (2 * (size) - 1 - (p)) : (p)))
+
+// Struct for an image pyramid
+typedef struct {
+ int n_levels;
+ int widths[MAX_LEVELS];
+ int heights[MAX_LEVELS];
+ int strides[MAX_LEVELS];
+ unsigned char **levels;
+} imPyramid;
+
+typedef struct {
+ int stride;
+ double *confidence;
+ double *u;
+ double *v;
+} flowMV;
+
+static void * safe_malloc(size_t size) {
+ void *v = malloc(size);
+ if (!v) {
+ fprintf(stderr, "Not enough memory\n");
+ exit(EXIT_FAILURE);
+ }
+ return v;
+}
+
+// Produces an image pyramid where each level is resized by a
+// specified resize factor
+static void image_pyramid(unsigned char *img, imPyramid *pyr, int width,
+ int height, int stride, int n_levels) {
+ int i;
+ int max_levels = 1;
+ int scaled = width < height ? (width/MIN_LEVEL_SIZE) :
+ (height/MIN_LEVEL_SIZE);
+ pyr->widths[0] = width;
+ pyr->heights[0] = height;
+ pyr->strides[0] = stride;
+
+ if (n_levels == 1) {
+ pyr->n_levels = 1;
+ pyr->levels = (unsigned char**)safe_malloc(sizeof(*pyr->levels));
+ pyr->levels[0] = img;
+ return;
+ }
+ // compute number of possible levels of the pyramid. The smallest dim of the
+ // smallest level should be no less than 30 px.
+ while (scaled > 1) {
+ max_levels++;
+ scaled >>= 1;
+ }
+
+ if (n_levels > max_levels)
+ n_levels = max_levels;
+
+ pyr->n_levels = n_levels;
+ pyr->levels = (unsigned char**)safe_malloc(n_levels * sizeof(*pyr->levels));
+ pyr->levels[0] = img;
+
+ // compute each level
+ for (i = 1; i < n_levels; ++i) {
+ pyr->widths[i] = pyr->widths[i-1] >> 1;
+ pyr->heights[i] = pyr->heights[i-1] >> 1;
+ pyr->strides[i] = pyr->widths[i];
+ pyr->levels[i] = (unsigned char*)safe_malloc(pyr->widths[i] *
+ pyr->heights[i] *
+ sizeof(*pyr->levels[i]));
+
+ vp9_resize_plane(pyr->levels[i-1], pyr->heights[i-1], pyr->widths[i-1],
+ pyr->strides[i-1], pyr->levels[i], pyr->heights[i],
+ pyr->widths[i], pyr->strides[i]);
+ }
+}
+
+static void destruct_pyramid(imPyramid *pyr) {
+ int i;
+
+ // start at level 1 because level 0 is a reference to the original image
+ for (i = 1; i < pyr->n_levels; ++i)
+ free(pyr->levels[i]);
+ free(pyr->levels);
+}
+
+// Convolution with reflected content padding to minimize edge artifacts.
+static void convolve_char(double *filter, unsigned char *image,
+ double *convolved, int filt_width, int filt_height,
+ int image_width, int image_height, int image_stride,
+ int convolved_stride) {
+ int i, j, x, y, imx, imy;
+ double pixel_val;
+ double filter_val;
+ double image_val;
+ int x_pad_size = filt_width >> 1;
+ int y_pad_size = filt_height >> 1;
+ for (j = 0; j < image_height; ++j)
+ for (i = 0; i < image_width; ++i) {
+ pixel_val = 0;
+ for (y = (-y_pad_size); y<= y_pad_size; y++)
+ for (x = (-x_pad_size); x <= x_pad_size; x++) {
+ filter_val = filter[(x + x_pad_size) + (y + y_pad_size) * filt_width];
+ imx = WRAP_PIXEL(i + x, image_width);
+ imy = WRAP_PIXEL(j + y, image_height);
+ image_val = image[imx + imy * image_stride];
+ pixel_val += (filter_val * image_val);
+ }
+ convolved[i + j * convolved_stride] = pixel_val;
+ }
+}
+
+// Convolution with reflected content padding to minimize edge artifacts.
+static void convolve_double(double *filter, double *image, double *convolved,
+ int filt_width, int filt_height, int image_width,
+ int image_height, int image_stride,
+ int convolved_stride) {
+ int i, j, x, y, imx, imy;
+ double pixel_val;
+ double filter_val;
+ double image_val;
+ int x_pad_size = filt_width >> 1;
+ int y_pad_size = filt_height >> 1;
+ for (j = 0; j < image_height; ++j)
+ for (i = 0; i < image_width; ++i) {
+ pixel_val = 0;
+ for (y = (-y_pad_size); y<= y_pad_size; y++)
+ for (x = (-x_pad_size); x <= x_pad_size; x++) {
+ filter_val = filter[(x + x_pad_size) + (y + y_pad_size) * filt_width];
+ imx = WRAP_PIXEL(i + x, image_width);
+ imy = WRAP_PIXEL(j + y, image_height);
+ image_val = image[imx + imy * image_stride];
+ pixel_val += (filter_val * image_val);
+ }
+ convolved[i + j * convolved_stride] = pixel_val;
+ }
+}
+
+// computes x and y spatial derivatives of an image.
+static void differentiate(double *img, int width, int height,
+ int stride, double *dx_img, double *dy_img,
+ int deriv_stride) {
+ const double spatial_deriv_kernel[] = {-0.0833, 0.6667, 0.0,
+ -0.6667, 0.0833};
+ convolve_double(spatial_deriv_kernel, img, dx_img, 5, 1, width, height,
+ stride, deriv_stride);
+ convolve_double(spatial_deriv_kernel, img, dy_img, 1, 5, width, height,
+ stride, deriv_stride);
+}
+
+// creates a 2D gaussian kernel
+static void gaussian_kernel(double *mat, int size, double sig, double amp) {
+ int x, y;
+ double filter_val;
+ int half;
+ double denominator = 1 / (2.0 * sig * sig);
+ double sum = 0.0f;
+
+ size |= 1;
+ half = size >> 1;
+ for (y = -half; y<= half; y++)
+ for (x = -half; x<= half; x++) {
+ filter_val = amp * exp((double)(-1.0 * ((x * x + y * y) * denominator)));
+ mat[(x + half) + ((y + half) * size)] = filter_val;
+ sum += filter_val;
+ }
+
+ // normalize the filter
+ if (sum > MAX_ERROR) {
+ sum = 1 / sum;
+ for (x = 0; x < size * size; x++) {
+ mat[x] = mat[x] * sum;
+ }
+ }
+}
+
+// blurs image with a gaussian kernel
+static void blur_img(unsigned char *img, int width, int height,
+ int stride, double *smooth_img, int smooth_stride,
+ int smoothing_size, double smoothing_sig,
+ double smoothing_amp) {
+ double *smoothing_kernel = (double *)safe_malloc(smoothing_size *
+ smoothing_size *
+ sizeof(*smoothing_kernel));
+ gaussian_kernel(smoothing_kernel, smoothing_size, smoothing_sig,
+ smoothing_amp);
+ convolve_char(smoothing_kernel, img, smooth_img, smoothing_size,
+ smoothing_size, width, height, stride, smooth_stride);
+ free(smoothing_kernel);
+}
+
+// Implementation of Hoare's select for linear time median selection.
+// Takes in a pointer to the beginning of a list of values, the length
+// of the list, an integer representing the index of the number to be
+// selected, and an unsigned integer used to seed a random number generator.
+static double selection(double *vals, int length, int k, unsigned int seed) {
+ int pivot_ind, x;
+ double pivot;
+ double L[MAX_MEDIAN_LENGTH], G[MAX_MEDIAN_LENGTH];
+ int l = 0, e = 0, g = 0;
+ pivot_ind = (int) ((length / ((double)RAND_MAX)) * rand_r(&seed));
+ pivot = vals[pivot_ind];
+ for (x = 0; x < length; ++x) {
+ if (RELATIVE_ERROR(vals[x], pivot) <= MAX_ERROR) {
+ e++;
+ } else if (vals[x] < pivot) {
+ L[l] = vals[x];
+ l++;
+ } else if (vals[x] > pivot) {
+ G[g] = vals[x];
+ g++;
+ }
+ }
+
+ if (k <= l)
+ return selection(L, l, k, seed);
+ else if (k <= l + e)
+ return pivot;
+ else
+ return selection(G, g, k - l - e, seed);
+}
+
+// Performs median filtering for denoising. This implementation uses hoare's
+// select to find the median in a block. block_x, block_y are
+// the x and y coordinates of the center of the block in the source.
+static void median_filter(double *source, double *filtered, int block_size,
+ int width, int height, int source_stride,
+ int filtered_stride) {
+ int i, j, block_x, block_y, imx, imy;
+ double pivot, val;
+ int length = block_size * block_size;
+ double L[MAX_MEDIAN_LENGTH], G[MAX_MEDIAN_LENGTH];
+ int k = length >> 1 | 1;
+ int l, e, g;
+ int pad_size = block_size >> 1;
+ unsigned int seed = (unsigned int)*source;
+
+ // find the median within each block. Reflected content is used for padding.
+ for (block_y = 0; block_y < height; ++block_y)
+ for (block_x = 0; block_x < width; ++block_x) {
+ l = 0, e = 0, g = 0;
+ memset(L, 0, length * sizeof(*L));
+ memset(G, 0, length * sizeof(*G));
+ pivot = source[block_x + block_y * source_stride];
+ for (j = -pad_size; j <= pad_size; ++j)
+ for (i = -pad_size; i <= pad_size; ++i) {
+ imx = WRAP_PIXEL(i + block_x, width);
+ imy = WRAP_PIXEL(j + block_y, height);
+ val = source[imx + imy * source_stride];
+
+ // pulling out the first iteration of selection so we don't have
+ // to iterate through the block to flatten it and then
+ // iterate through those same values to put them into
+ // the L E G bins in selection. Ugly but more efficent.
+ if (RELATIVE_ERROR(val, pivot) <= MAX_ERROR) {
+ e++;
+ } else if (val < pivot) {
+ L[l] = val;
+ l++;
+ } else if (val > pivot) {
+ G[g] = val;
+ g++;
+ }
+ }
+ if (k <= l)
+ filtered[block_x + block_y * filtered_stride] =
+ selection(L, l, k, seed);
+ else if (k <= l + e)
+ filtered[block_x + block_y * filtered_stride] = pivot;
+ else
+ filtered[block_x + block_y * filtered_stride] = selection(G, g, k -
+ l - e, seed);
+ }
+}
+
+static inline void pointwise_matrix_sub(double *mat1, double *mat2,
+ double *diff, int width, int height,
+ int stride1, int stride2,
+ int diffstride) {
+ int i, j;
+ for (j = 0; j < height; ++j)
+ for (i = 0; i < width; ++i) {
+ diff[i + j * diffstride] = mat1[i + j * stride1] - mat2[i + j * stride2];
+ }
+}
+
+static inline void pointwise_matrix_add(double *mat1, double *mat2,
+ double *sum, int width, int height,
+ int stride1, int stride2,
+ int sumstride) {
+ int i, j;
+ for (j = 0; j < height; ++j)
+ for (i = 0; i < width; ++i) {
+ sum[i + j * sumstride] = mat1[i + j * stride1] + mat2[i + j * stride2];
+ }
+}
+
+// Solves lucas kanade equation at any give pixel in the image using a local
+// neighborhood for support. loc_x and loc_y are the x and y components
+// of the center of the local neighborhood. Assumes dx, dy and dt have same
+// stride. window is a wind_size x wind_size set of weights for the
+// window_size x window_size neighborhood surrounding loc_x and loc_y.
+static void optical_flow_per_pixel(double *dx, double *dy, double *dt,
+ double *window, int wind_size,
+ flowMV *flow, int width, int height,
+ int stride, int loc_x, int loc_y) {
+ int i, j, iw, jw, im_ind;
+ double g = 0;
+ // M and b are matrices used to solve the equation a = M^-1 * b where a
+ // are the desired optical flow parameters
+ double M[4] = {0, 0, 0, 0};
+ double b[2] = {0, 0};
+ double det = 0;
+ double trace2 = 0;
+ double corner_score = 0;
+ int step = wind_size >> 1;
+ double gdx = 0, gdy = 0;
+
+ for (j = loc_y - step; j <= loc_y + step; ++j)
+ for (i = loc_x - step; i <= loc_x + step; ++i) {
+ // if pixel is out of bounds, use reflected image content
+ iw = WRAP_PIXEL(i, width);
+ jw = WRAP_PIXEL(j, height);
+ im_ind = iw + jw * stride;
+ g = window[(i - loc_x + step) + (j - loc_y + step) * wind_size];
+ gdx = g * dx[im_ind];
+ gdy = g * dy[im_ind];
+ M[0] += gdx * dx[im_ind];
+ M[1] += gdx * dy[im_ind];
+ M[3] += gdy * dy[im_ind];
+ b[0] += -gdx * dt[im_ind];
+ b[1] += -gdy * dt[im_ind];
+ }
+ M[2] = M[1];
+ det = (M[0] * M[3]) - (M[1] * M[1]);
+ trace2 = (M[0] + M[3]) * (M[0] + M[3]);
+ if (RELATIVE_ERROR(det, 0) > MAX_ERROR) {
+ const double det_inv = 1 / det;
+ const double mult_b0 = det_inv * b[0];
+ const double mult_b1 = det_inv * b[1];
+ flow->u[loc_x + loc_y * flow->stride] = M[3] * mult_b0 - M[1] * mult_b1;
+ flow->v[loc_x + loc_y * flow->stride] = -M[2] * mult_b0 + M[0] * mult_b1;
+ } else {
+ if (M[0] == 0 && M[3] == 0) {
+ flow->u[loc_x + loc_y * flow->stride] = 0;
+ flow->v[loc_x + loc_y * flow->stride] = 0;
+ } else {
+ const double trace2_inv = 1 / trace2;
+ const double mult_b0 = trace2_inv * b[0];
+ const double mult_b1 = trace2_inv * b[1];
+ flow->u[loc_x + loc_y * flow->stride] = M[0] * mult_b0 + M[1] * mult_b1;
+ flow->v[loc_x + loc_y * flow->stride] = M[2] * mult_b0 + M[3] * mult_b1;
+ }
+ }
+ // compute inverse harris corner score as confidence metric
+ corner_score = 1 / (det - (0.01 * trace2));
+ flow->confidence[loc_x + loc_y * flow->stride] = corner_score > 2 ?
+ 2 : corner_score;
+}
+
+// computes lucas kanade optical flow. Note that this assumes images that
+// already denoised and differentiated.
+static void lucas_kanade_base(double *smooth_frm, double *smooth_ref,
+ double *dx, double *dy, double *dt, flowMV *flow,
+ int width, int height, int smooth_frm_stride,
+ int smooth_ref_stride, int deriv_stride) {
+ int i, j;
+ const int local_neighborhood_sz = 5;
+ // windowing function for local neighborhood weights
+ const double window[] = {0.0039, 0.0156, 0.0234, 0.0156, 0.0039,
+ 0.0156, 0.0625, 0.0938, 0.0625, 0.0156,
+ 0.0234, 0.0938, 0.1406, 0.0938, 0.0234,
+ 0.0156, 0.0625, 0.0938, 0.0625, 0.0156,
+ 0.0039, 0.0156, 0.0234, 0.0156, 0.0039};
+ // compute temporal derivative
+ pointwise_matrix_sub(smooth_ref, smooth_frm, dt, width, height,
+ smooth_ref_stride, smooth_frm_stride, deriv_stride);
+
+ for (j = 0; j < height; ++j)
+ for (i = 0; i < width; ++i) {
+ optical_flow_per_pixel(dx, dy, dt, window, local_neighborhood_sz,
+ flow, width, height, deriv_stride, i, j);
+ }
+}
+
+// Improves an initial approximation for the vector field by iteratively
+// warping one image towards the other.
+static void iterative_refinement(unsigned char *ref, double *smooth_frm,
+ double *dx, double *dy, double *dt,
+ flowMV *flow, int width, int height,
+ int ref_stride, int smooth_frm_stride,
+ int deriv_stride, int n_refinements) {
+ int i, j, n;
+ double x, y;
+ unsigned char *estimate = (unsigned char*)safe_malloc(width * height *
+ sizeof(*estimate));
+ double *smooth_estimate = (double*)safe_malloc(width * height *
+ sizeof(*smooth_estimate));
+ flowMV new_flow;
+ new_flow.u = (double*)safe_malloc(width * height * sizeof(*new_flow.u));
+ new_flow.v = (double*)safe_malloc(width * height * sizeof(*new_flow.v));
+ new_flow.confidence = (double*)safe_malloc(width * height *
+ sizeof(*new_flow.confidence));
+ new_flow.stride = width;
+ // warp one image toward the other
+ for (n = 0; n < n_refinements; ++n) {
+ for (j = 0; j < height; ++j)
+ for (i = 0; i < width; ++i) {
+ x = i - flow->u[i + j * flow->stride];
+ y = j - flow->v[i + j * flow->stride];
+ estimate[i + j * width] = interpolate(ref, x, y, width,
+ height, ref_stride);
+ }
+
+ // compute flow between frame and warped estimate
+ blur_img(estimate, width, height, width, smooth_estimate, width, BLUR_SIZE,
+ BLUR_SIGMA, BLUR_AMP);
+ lucas_kanade_base(smooth_frm, smooth_estimate, dx, dy, dt, &new_flow,
+ width, height, smooth_frm_stride, width, deriv_stride);
+
+ // add residual and apply median filter for denoising
+ pointwise_matrix_add(flow->u, new_flow.u, new_flow.u, width, height,
+ flow->stride, new_flow.stride, new_flow.stride);
+ pointwise_matrix_add(flow->v, new_flow.v, new_flow.v, width, height,
+ flow->stride, new_flow.stride, new_flow.stride);
+ median_filter(new_flow.u, flow->u, MEDIAN_FILTER_SIZE, width, height,
+ new_flow.stride, flow->stride);
+ median_filter(new_flow.v, flow->v, MEDIAN_FILTER_SIZE, width, height,
+ new_flow.stride, flow->stride);
+ median_filter(new_flow.confidence, flow->confidence, MEDIAN_FILTER_SIZE,
+ width, height, new_flow.stride, flow->stride);
+ }
+
+ free(smooth_estimate);
+ free(estimate);
+ free(new_flow.u);
+ free(new_flow.v);
+ free(new_flow.confidence);
+}
+
+// interface for computing optical flow.
+void compute_flow(unsigned char *frm, unsigned char *ref,
+ double *u, double *v, double *confidence,
+ int width, int height, int frm_stride,
+ int ref_stride, int n_levels) {
+ double *smooth_ref = (double*)safe_malloc(width * height *
+ sizeof(*smooth_ref));
+ double *smooth_frm = (double*)safe_malloc(width * height *
+ sizeof(*smooth_frm));
+ double *dx_frm = (double*)safe_malloc(width * height *
+ sizeof(*dx_frm));
+ double *dy_frm = (double*)safe_malloc(width * height *
+ sizeof(*dx_frm));
+ double *dt = (double *)safe_malloc(width * height * sizeof(*dt));
+ flowMV flow;
+ flow.u = u;
+ flow.v = v;
+ flow.confidence = confidence;
+ flow.stride = width;
+ if (n_levels == 1) {
+ // Uses lucas kanade and iterative estimation without coarse to fine.
+ // This is more efficient for small motion but becomes less accurate
+ // as motion gets larger.
+ blur_img(ref, width, height, ref_stride, smooth_ref, width, BLUR_SIZE,
+ BLUR_SIGMA, BLUR_AMP);
+ blur_img(frm, width, height, frm_stride, smooth_frm, width, BLUR_SIZE,
+ BLUR_SIGMA, BLUR_AMP);
+ differentiate(smooth_frm, width, height, width, dx_frm, dy_frm, width);
+ lucas_kanade_base(smooth_frm, smooth_ref, dx_frm, dy_frm, dt, &flow,
+ width, height, width, width, width);
+ iterative_refinement(ref, smooth_frm, dx_frm, dy_frm, dt, &flow, width,
+ height, ref_stride, width, width, ITERATIONS);
+ } else {
+ int i, j, k;
+ // w, h, s_r, s_f are intermediate width, height, ref stride, frame stride
+ // as the resolution changes up the pyramid
+ int w, h, s_r, s_f, w_upscale, h_upscale, s_upscale;
+ double x, y;
+ double *smooth_frm_approx = (double*)safe_malloc(width * height *
+ sizeof(*smooth_frm_approx));
+ double *dx_frm_approx = (double*)safe_malloc(width * height *
+ sizeof(*dx_frm_approx));
+ double *dy_frm_approx = (double*)safe_malloc(width * height *
+ sizeof(*dy_frm_approx));
+ double *u_upscale = (double*)safe_malloc(width * height *
+ sizeof(*u_upscale));
+ double *v_upscale = (double*)safe_malloc(width * height *
+ sizeof(*v_upscale));
+ unsigned char *frm_approx = (unsigned char*)safe_malloc(width * height *
+ sizeof(*frm_approx));
+ imPyramid *frm_pyramid = (imPyramid*)safe_malloc(sizeof(imPyramid));
+ imPyramid *ref_pyramid = (imPyramid*)safe_malloc(sizeof(imPyramid));
+ // create image pyramids
+ image_pyramid(frm, frm_pyramid, width, height, frm_stride, n_levels);
+ image_pyramid(ref, ref_pyramid, width, height, ref_stride, n_levels);
+ n_levels = frm_pyramid->n_levels;
+ w = frm_pyramid->widths[n_levels - 1];
+ h = frm_pyramid->heights[n_levels - 1];
+ s_r = ref_pyramid->strides[n_levels - 1];
+ s_f = frm_pyramid->strides[n_levels - 1];
+
+ // for each level in the pyramid starting with the coarsest
+ for (i = n_levels - 1; i >= 0; --i) {
+ assert(frm_pyramid->widths[i] == ref_pyramid->widths[i]);
+ assert(frm_pyramid->heights[i] == ref_pyramid->heights[i]);
+ blur_img(ref_pyramid->levels[i], w, h, s_r, smooth_ref, width, BLUR_SIZE,
+ BLUR_SIGMA, BLUR_AMP);
+ blur_img(frm_pyramid->levels[i], w, h, s_f, smooth_frm, width, BLUR_SIZE,
+ BLUR_SIGMA, BLUR_AMP);
+ differentiate(smooth_frm, w, h, width, dx_frm, dy_frm, width);
+
+
+ // Compute optical flow at this level between the reference frame and
+ // the estimate produced from warping. If this is the first iteration
+ // (meaning no warping has happened yet) then we have no approximation
+ // for the frame and only have to worry about flow between the original
+ // frame and reference. Every subsequent iteration requires computing
+ // and estimate for the frame based on previously computed flow vectors.
+
+ if (i < n_levels - 1) {
+ blur_img(frm_approx, w, h, width, smooth_frm_approx, width, BLUR_SIZE,
+ BLUR_SIGMA, BLUR_AMP);
+ differentiate(smooth_frm_approx, w, h, width, dx_frm_approx,
+ dy_frm_approx, width);
+ lucas_kanade_base(smooth_frm_approx, smooth_ref, dx_frm_approx,
+ dy_frm_approx, dt, &flow, w, h, width, width, width);
+ pointwise_matrix_add(flow.u, u_upscale, u_upscale, w, h, flow.stride,
+ width, width);
+ pointwise_matrix_add(flow.v, v_upscale, v_upscale, w, h, flow.stride,
+ width, width);
+ median_filter(u_upscale, flow.u, MEDIAN_FILTER_SIZE, w, h, width,
+ flow.stride);
+ median_filter(v_upscale, flow.v, MEDIAN_FILTER_SIZE, w, h, width,
+ flow.stride);
+ } else {
+ lucas_kanade_base(smooth_frm, smooth_ref, dx_frm,
+ dy_frm, dt, &flow, w, h, width, width, width);
+ }
+ iterative_refinement(ref_pyramid->levels[i], smooth_frm, dx_frm, dy_frm,
+ dt, &flow, w, h, s_r, width, width, ITERATIONS);
+
+
+ // if we're at the finest level, we're ready to return u and v
+ if (i == 0) {
+ assert(w == width);
+ assert(h == height);
+ destruct_pyramid(frm_pyramid);
+ destruct_pyramid(ref_pyramid);
+ free(frm_pyramid);
+ free(ref_pyramid);
+ free(frm_approx);
+ free(smooth_frm_approx);
+ free(dx_frm_approx);
+ free(dy_frm_approx);
+ free(u_upscale);
+ free(v_upscale);
+ break;
+ }
+
+ w_upscale = ref_pyramid->widths[i - 1];
+ h_upscale = ref_pyramid->heights[i - 1];
+ s_upscale = ref_pyramid->strides[i - 1];
+
+ // warp image according to optical flow estimate
+ for (j = 0; j < h_upscale; ++j)
+ for (k = 0; k < w_upscale; ++k) {
+ u_upscale[k + j * w_upscale] = flow.u[(int)(k >> 1) +
+ (int)(j >> 1) * flow.stride];
+ v_upscale[k + j * w_upscale] = flow.v[(int)(k >> 1) +
+ (int)(j >> 1) * flow.stride];
+ x = k - u_upscale[k + j * w_upscale];
+ y = j - v_upscale[k + j * w_upscale];
+ frm_approx[k + j * width] = interpolate(ref_pyramid->levels[i - 1],
+ x, y, w_upscale, h_upscale,
+ s_upscale);
+ }
+
+ // assign dimensions for next level
+ w = frm_pyramid->widths[i - 1];
+ h = frm_pyramid->heights[i - 1];
+ s_r = ref_pyramid->strides[i - 1];
+ s_f = frm_pyramid->strides[i - 1];
+ }
+ }
+ free(smooth_ref);
+ free(smooth_frm);
+ free(dx_frm);
+ free(dy_frm);
+ free(dt);
+}