2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
15 #include "./vpx_config.h"
17 #include "vpx_mem/vpx_mem.h"
19 #include "vp9/common/vp9_common.h"
21 #include "vp9/encoder/vp9_onyx_int.h"
22 #include "vp9/encoder/vp9_mcomp.h"
24 // #define NEW_DIAMOND_SEARCH
26 static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
28 return &buf->buf[mv->row * buf->stride + mv->col];
31 void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) {
32 int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
33 int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
34 int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
35 int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
37 col_min = MAX(col_min, (MV_LOW >> 3) + 1);
38 row_min = MAX(row_min, (MV_LOW >> 3) + 1);
39 col_max = MIN(col_max, (MV_UPP >> 3) - 1);
40 row_max = MIN(row_max, (MV_UPP >> 3) - 1);
42 // Get intersection of UMV window and valid MV window to reduce # of checks
44 if (x->mv_col_min < col_min)
45 x->mv_col_min = col_min;
46 if (x->mv_col_max > col_max)
47 x->mv_col_max = col_max;
48 if (x->mv_row_min < row_min)
49 x->mv_row_min = row_min;
50 if (x->mv_row_max > row_max)
51 x->mv_row_max = row_max;
54 int vp9_init_search_range(VP9_COMP *cpi, int size) {
57 // Minimum search size no matter what the passed in value.
60 while ((size << sr) < MAX_FULL_PEL_VAL)
63 sr += cpi->sf.reduce_first_step_size;
64 sr = MIN(sr, (cpi->sf.max_step_search_steps - 2));
68 static INLINE int mv_cost(const MV *mv,
69 const int *joint_cost, int *comp_cost[2]) {
70 return joint_cost[vp9_get_mv_joint(mv)] +
71 comp_cost[0][mv->row] + comp_cost[1][mv->col];
74 int vp9_mv_bit_cost(const MV *mv, const MV *ref,
75 const int *mvjcost, int *mvcost[2], int weight) {
76 const MV diff = { mv->row - ref->row,
78 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
81 static int mv_err_cost(const MV *mv, const MV *ref,
82 const int *mvjcost, int *mvcost[2],
85 const MV diff = { mv->row - ref->row,
87 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) *
93 static int mvsad_err_cost(const MV *mv, const MV *ref,
94 const int *mvjsadcost, int *mvsadcost[2],
97 const MV diff = { mv->row - ref->row,
99 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) *
105 void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
106 int len, ss_count = 1;
108 x->ss[0].mv.col = x->ss[0].mv.row = 0;
111 for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
112 // Generate offsets for 4 search sites per step.
113 const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
115 for (i = 0; i < 4; ++i) {
116 search_site *const ss = &x->ss[ss_count++];
118 ss->offset = ss->mv.row * stride + ss->mv.col;
122 x->ss_count = ss_count;
123 x->searches_per_step = 4;
126 void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
127 int len, ss_count = 1;
129 x->ss[0].mv.col = x->ss[0].mv.row = 0;
132 for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
133 // Generate offsets for 8 search sites per step.
134 const MV ss_mvs[8] = {
135 {-len, 0 }, {len, 0 }, { 0, -len}, {0, len},
136 {-len, -len}, {-len, len}, {len, -len}, {len, len}
139 for (i = 0; i < 8; ++i) {
140 search_site *const ss = &x->ss[ss_count++];
142 ss->offset = ss->mv.row * stride + ss->mv.col;
146 x->ss_count = ss_count;
147 x->searches_per_step = 8;
151 * To avoid the penalty for crossing cache-line read, preload the reference
152 * area in a small buffer, which is aligned to make sure there won't be crossing
153 * cache-line read while reading from this buffer. This reduced the cpu
154 * cycles spent on reading ref data in sub-pixel filter functions.
155 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
156 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
157 * could reduce the area.
160 /* estimated cost of a motion vector (r,c) */
163 ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
164 mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
165 error_per_bit + 4096) >> 13 : 0)
168 // convert motion vector component to offset for svf calc
169 static INLINE int sp(int x) {
173 static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
174 return &buf[(r >> 3) * stride + (c >> 3)];
177 /* returns subpixel variance error function */
179 vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
182 /* checks if (r, c) has better score than previous best */
183 #define CHECK_BETTER(v, r, c) \
184 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
185 thismse = (DIST(r, c)); \
186 if ((v = MVC(r, c) + thismse) < besterr) { \
190 *distortion = thismse; \
197 #define FIRST_LEVEL_CHECKS \
199 unsigned int left, right, up, down, diag; \
200 CHECK_BETTER(left, tr, tc - hstep); \
201 CHECK_BETTER(right, tr, tc + hstep); \
202 CHECK_BETTER(up, tr - hstep, tc); \
203 CHECK_BETTER(down, tr + hstep, tc); \
204 whichdir = (left < right ? 0 : 1) + \
205 (up < down ? 0 : 2); \
206 switch (whichdir) { \
208 CHECK_BETTER(diag, tr - hstep, tc - hstep); \
211 CHECK_BETTER(diag, tr - hstep, tc + hstep); \
214 CHECK_BETTER(diag, tr + hstep, tc - hstep); \
217 CHECK_BETTER(diag, tr + hstep, tc + hstep); \
222 #define SECOND_LEVEL_CHECKS \
225 unsigned int second; \
226 if (tr != br && tc != bc) { \
229 CHECK_BETTER(second, tr + kr, tc + 2 * kc); \
230 CHECK_BETTER(second, tr + 2 * kr, tc + kc); \
231 } else if (tr == br && tc != bc) { \
233 CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \
234 CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \
235 switch (whichdir) { \
238 CHECK_BETTER(second, tr + hstep, tc + kc); \
242 CHECK_BETTER(second, tr - hstep, tc + kc); \
245 } else if (tr != br && tc == bc) { \
247 CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \
248 CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \
249 switch (whichdir) { \
252 CHECK_BETTER(second, tr + kr, tc + hstep); \
256 CHECK_BETTER(second, tr + kr, tc - hstep); \
262 int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
263 MV *bestmv, const MV *ref_mv,
266 const vp9_variance_fn_ptr_t *vfp,
269 int *mvjcost, int *mvcost[2],
271 unsigned int *sse1) {
272 const uint8_t *const z = x->plane[0].src.buf;
273 const int src_stride = x->plane[0].src.stride;
274 const MACROBLOCKD *xd = &x->e_mbd;
275 unsigned int besterr = INT_MAX;
277 unsigned int whichdir;
279 unsigned int halfiters = iters_per_step;
280 unsigned int quarteriters = iters_per_step;
281 unsigned int eighthiters = iters_per_step;
283 const int y_stride = xd->plane[0].pre[0].stride;
284 const int offset = bestmv->row * y_stride + bestmv->col;
285 const uint8_t *const y = xd->plane[0].pre[0].buf;
287 int rr = ref_mv->row;
288 int rc = ref_mv->col;
289 int br = bestmv->row * 8;
290 int bc = bestmv->col * 8;
292 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
293 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
294 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
295 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
304 // calculate central point error
305 besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);
306 *distortion = besterr;
307 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
317 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
318 if (forced_stop != 2) {
321 if (quarteriters > 1) {
328 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
331 if (eighthiters > 1) {
337 // These lines insure static analysis doesn't warn that
338 // tr and tc aren't used after the above point.
345 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
346 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
353 /* returns subpixel variance error function */
355 vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
356 z, src_stride, &sse, second_pred)
358 int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
359 MV *bestmv, const MV *ref_mv,
362 const vp9_variance_fn_ptr_t *vfp,
365 int *mvjcost, int *mvcost[2],
368 const uint8_t *second_pred,
370 const uint8_t *const z = x->plane[0].src.buf;
371 const int src_stride = x->plane[0].src.stride;
372 const MACROBLOCKD *xd = &x->e_mbd;
373 unsigned int besterr = INT_MAX;
375 unsigned int whichdir;
377 const unsigned int halfiters = iters_per_step;
378 const unsigned int quarteriters = iters_per_step;
379 const unsigned int eighthiters = iters_per_step;
381 DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
382 const int y_stride = xd->plane[0].pre[0].stride;
383 const int offset = bestmv->row * y_stride + bestmv->col;
384 const uint8_t *const y = xd->plane[0].pre[0].buf;
386 int rr = ref_mv->row;
387 int rc = ref_mv->col;
388 int br = bestmv->row * 8;
389 int bc = bestmv->col * 8;
391 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
392 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
393 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
394 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
403 // calculate central point error
404 // TODO(yunqingwang): central pointer error was already calculated in full-
405 // pixel search, and can be passed in this function.
406 vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
407 besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
408 *distortion = besterr;
409 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
411 // Each subsequent iteration checks at least one point in
412 // common with the last iteration could be 2 ( if diag selected)
421 // Each subsequent iteration checks at least one point in common with
422 // the last iteration could be 2 ( if diag selected) 1/4 pel
424 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
425 if (forced_stop != 2) {
428 if (quarteriters > 1) {
435 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
438 if (eighthiters > 1) {
444 // These lines insure static analysis doesn't warn that
445 // tr and tc aren't used after the above point.
452 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
453 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
464 static INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
466 return ((row - range) >= x->mv_row_min) &
467 ((row + range) <= x->mv_row_max) &
468 ((col - range) >= x->mv_col_min) &
469 ((col + range) <= x->mv_col_max);
472 static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) {
473 return (mv->col >= x->mv_col_min) && (mv->col <= x->mv_col_max) &&
474 (mv->row >= x->mv_row_min) && (mv->row <= x->mv_row_max);
477 #define CHECK_BETTER \
479 if (thissad < bestsad) {\
481 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, \
482 mvjsadcost, mvsadcost, sad_per_bit);\
483 if (thissad < bestsad) {\
490 #define MAX_PATTERN_SCALES 11
491 #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale
492 #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates
494 // Generic pattern search function that searches over multiple scales.
495 // Each scale can have a different number of candidates and shape of
496 // candidates as indicated in the num_candidates and candidates arrays
497 // passed into this function
498 static int vp9_pattern_search(const MACROBLOCK *x,
502 int do_init_search, int do_refine,
503 const vp9_variance_fn_ptr_t *vfp,
505 const MV *center_mv, MV *best_mv,
506 const int num_candidates[MAX_PATTERN_SCALES],
507 const MV candidates[MAX_PATTERN_SCALES]
508 [MAX_PATTERN_CANDIDATES]) {
509 const MACROBLOCKD *const xd = &x->e_mbd;
510 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
511 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
514 const struct buf_2d *const what = &x->plane[0].src;
515 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
517 int bestsad = INT_MAX;
520 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
521 int best_init_s = search_param_to_steps[search_param];
522 const int *const mvjsadcost = x->nmvjointsadcost;
523 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
525 // adjust ref_mv to make sure it is within MV range
526 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
530 // Work out the start point for the search
531 bestsad = vfp->sdf(what->buf, what->stride,
532 get_buf_from_mv(in_what, ref_mv), in_what->stride,
533 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv,
534 mvjsadcost, mvsadcost, sad_per_bit);
536 // Search all possible scales upto the search param around the center point
537 // pick the scale of the point that is best as the starting scale of
538 // further steps around it.
539 if (do_init_search) {
542 for (t = 0; t <= s; ++t) {
544 if (check_bounds(x, br, bc, 1 << t)) {
545 for (i = 0; i < num_candidates[t]; i++) {
546 const MV this_mv = {br + candidates[t][i].row,
547 bc + candidates[t][i].col};
548 thissad = vfp->sdf(what->buf, what->stride,
549 get_buf_from_mv(in_what, &this_mv),
550 in_what->stride, bestsad);
554 for (i = 0; i < num_candidates[t]; i++) {
555 const MV this_mv = {br + candidates[t][i].row,
556 bc + candidates[t][i].col};
557 if (!is_mv_in(x, &this_mv))
559 thissad = vfp->sdf(what->buf, what->stride,
560 get_buf_from_mv(in_what, &this_mv),
561 in_what->stride, bestsad);
565 if (best_site == -1) {
572 if (best_init_s != -1) {
573 br += candidates[best_init_s][k].row;
574 bc += candidates[best_init_s][k].col;
578 // If the center point is still the best, just skip this and move to
579 // the refinement step.
580 if (best_init_s != -1) {
585 // No need to search all 6 points the 1st time if initial search was used
586 if (!do_init_search || s != best_init_s) {
587 if (check_bounds(x, br, bc, 1 << s)) {
588 for (i = 0; i < num_candidates[s]; i++) {
589 const MV this_mv = {br + candidates[s][i].row,
590 bc + candidates[s][i].col};
591 thissad = vfp->sdf(what->buf, what->stride,
592 get_buf_from_mv(in_what, &this_mv),
593 in_what->stride, bestsad);
597 for (i = 0; i < num_candidates[s]; i++) {
598 const MV this_mv = {br + candidates[s][i].row,
599 bc + candidates[s][i].col};
600 if (!is_mv_in(x, &this_mv))
602 thissad = vfp->sdf(what->buf, what->stride,
603 get_buf_from_mv(in_what, &this_mv),
604 in_what->stride, bestsad);
609 if (best_site == -1) {
612 br += candidates[s][best_site].row;
613 bc += candidates[s][best_site].col;
619 int next_chkpts_indices[PATTERN_CANDIDATES_REF];
621 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
622 next_chkpts_indices[1] = k;
623 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
625 if (check_bounds(x, br, bc, 1 << s)) {
626 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
627 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
628 bc + candidates[s][next_chkpts_indices[i]].col};
629 thissad = vfp->sdf(what->buf, what->stride,
630 get_buf_from_mv(in_what, &this_mv),
631 in_what->stride, bestsad);
635 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
636 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
637 bc + candidates[s][next_chkpts_indices[i]].col};
638 if (!is_mv_in(x, &this_mv))
640 thissad = vfp->sdf(what->buf, what->stride,
641 get_buf_from_mv(in_what, &this_mv),
642 in_what->stride, bestsad);
647 if (best_site != -1) {
648 k = next_chkpts_indices[best_site];
649 br += candidates[s][k].row;
650 bc += candidates[s][k].col;
652 } while (best_site != -1);
656 // Check 4 1-away neighbors if do_refine is true.
657 // For most well-designed schemes do_refine will not be necessary.
659 static const MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}};
661 for (j = 0; j < 16; j++) {
663 if (check_bounds(x, br, bc, 1)) {
664 for (i = 0; i < 4; i++) {
665 const MV this_mv = {br + neighbors[i].row,
666 bc + neighbors[i].col};
667 thissad = vfp->sdf(what->buf, what->stride,
668 get_buf_from_mv(in_what, &this_mv),
669 in_what->stride, bestsad);
673 for (i = 0; i < 4; i++) {
674 const MV this_mv = {br + neighbors[i].row,
675 bc + neighbors[i].col};
676 if (!is_mv_in(x, &this_mv))
678 thissad = vfp->sdf(what->buf, what->stride,
679 get_buf_from_mv(in_what, &this_mv),
680 in_what->stride, bestsad);
685 if (best_site == -1) {
688 br += neighbors[best_site].row;
689 bc += neighbors[best_site].col;
700 int vp9_get_mvpred_var(const MACROBLOCK *x,
701 const MV *best_mv, const MV *center_mv,
702 const vp9_variance_fn_ptr_t *vfp,
704 const MACROBLOCKD *const xd = &x->e_mbd;
705 const struct buf_2d *const what = &x->plane[0].src;
706 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
707 const MV mv = {best_mv->row * 8, best_mv->col * 8};
710 return vfp->vf(what->buf, what->stride,
711 get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) +
712 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost,
713 x->mvcost, x->errorperbit) : 0);
716 int vp9_get_mvpred_av_var(const MACROBLOCK *x,
717 const MV *best_mv, const MV *center_mv,
718 const uint8_t *second_pred,
719 const vp9_variance_fn_ptr_t *vfp,
721 const MACROBLOCKD *const xd = &x->e_mbd;
722 const struct buf_2d *const what = &x->plane[0].src;
723 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
724 const MV mv = {best_mv->row * 8, best_mv->col * 8};
727 return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
728 what->buf, what->stride, &unused, second_pred) +
729 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost,
730 x->mvcost, x->errorperbit) : 0);
733 int vp9_hex_search(const MACROBLOCK *x,
738 const vp9_variance_fn_ptr_t *vfp,
740 const MV *center_mv, MV *best_mv) {
741 // First scale has 8-closest points, the rest have 6 points in hex shape
742 // at increasing scales
743 static const int hex_num_candidates[MAX_PATTERN_SCALES] = {
744 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
746 // Note that the largest candidate step at each scale is 2^scale
747 static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
748 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}},
749 {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}},
750 {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}},
751 {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}},
752 {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}},
753 {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}},
754 {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}},
755 {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}},
756 {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}},
757 {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}},
758 {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024},
761 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
762 do_init_search, 0, vfp, use_mvcost,
764 hex_num_candidates, hex_candidates);
767 int vp9_bigdia_search(const MACROBLOCK *x,
772 const vp9_variance_fn_ptr_t *vfp,
776 // First scale has 4-closest points, the rest have 8 points in diamond
777 // shape at increasing scales
778 static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
779 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
781 // Note that the largest candidate step at each scale is 2^scale
782 static const MV bigdia_candidates[MAX_PATTERN_SCALES]
783 [MAX_PATTERN_CANDIDATES] = {
784 {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}},
785 {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}},
786 {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}},
787 {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}},
788 {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}},
789 {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32},
790 {-16, 16}, {-32, 0}},
791 {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64},
792 {-32, 32}, {-64, 0}},
793 {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128},
794 {-64, 64}, {-128, 0}},
795 {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256},
796 {-128, 128}, {-256, 0}},
797 {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512},
798 {-256, 256}, {-512, 0}},
799 {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024},
800 {-512, 512}, {-1024, 0}},
802 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
803 do_init_search, 0, vfp, use_mvcost,
805 bigdia_num_candidates, bigdia_candidates);
808 int vp9_square_search(const MACROBLOCK *x,
813 const vp9_variance_fn_ptr_t *vfp,
817 // All scales have 8 closest points in square shape
818 static const int square_num_candidates[MAX_PATTERN_SCALES] = {
819 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
821 // Note that the largest candidate step at each scale is 2^scale
822 static const MV square_candidates[MAX_PATTERN_SCALES]
823 [MAX_PATTERN_CANDIDATES] = {
824 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}},
825 {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}},
826 {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}},
827 {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}},
828 {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16},
829 {-16, 16}, {-16, 0}},
830 {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32},
831 {-32, 32}, {-32, 0}},
832 {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64},
833 {-64, 64}, {-64, 0}},
834 {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128},
835 {-128, 128}, {-128, 0}},
836 {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256},
837 {-256, 256}, {-256, 0}},
838 {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512},
839 {-512, 512}, {-512, 0}},
840 {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024},
841 {0, 1024}, {-1024, 1024}, {-1024, 0}},
843 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
844 do_init_search, 0, vfp, use_mvcost,
846 square_num_candidates, square_candidates);
849 int vp9_fast_hex_search(const MACROBLOCK *x,
853 int do_init_search, // must be zero for fast_hex
854 const vp9_variance_fn_ptr_t *vfp,
858 return vp9_hex_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param),
859 sad_per_bit, do_init_search, vfp, use_mvcost,
863 int vp9_fast_dia_search(const MACROBLOCK *x,
868 const vp9_variance_fn_ptr_t *vfp,
872 return vp9_bigdia_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param),
873 sad_per_bit, do_init_search, vfp, use_mvcost,
879 int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
880 int search_param, int sad_per_bit, int *num00,
881 const vp9_variance_fn_ptr_t *fn_ptr,
882 const MV *center_mv) {
883 const MACROBLOCKD *const xd = &x->e_mbd;
884 const uint8_t *what = x->plane[0].src.buf;
885 const int what_stride = x->plane[0].src.stride;
886 const uint8_t *in_what;
887 const int in_what_stride = xd->plane[0].pre[0].stride;
889 unsigned int bestsad = INT_MAX;
890 int ref_row, ref_col;
892 unsigned int thissad;
893 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
895 const int *mvjsadcost = x->nmvjointsadcost;
896 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
903 int start_col, end_col;
904 int start_row, end_row;
907 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
908 ref_row = ref_mv->row;
909 ref_col = ref_mv->col;
911 best_mv->row = ref_row;
912 best_mv->col = ref_col;
914 // Work out the start point for the search
915 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
917 // Check the starting position
918 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
919 + mvsad_err_cost(best_mv, &fcenter_mv,
920 mvjsadcost, mvsadcost, sad_per_bit);
922 start_row = MAX(-range, x->mv_row_min - ref_row);
923 start_col = MAX(-range, x->mv_col_min - ref_col);
924 end_row = MIN(range, x->mv_row_max - ref_row);
925 end_col = MIN(range, x->mv_col_max - ref_col);
927 for (tr = start_row; tr <= end_row; ++tr) {
928 for (tc = start_col; tc <= end_col; tc += 4) {
929 if ((tc + 3) <= end_col) {
930 unsigned int sad_array[4];
931 unsigned char const *addr_ref[4];
932 for (i = 0; i < 4; ++i)
933 addr_ref[i] = in_what + tr * in_what_stride + tc + i;
935 fn_ptr->sdx4df(what, what_stride, addr_ref, in_what_stride, sad_array);
937 for (i = 0; i < 4; ++i) {
938 if (sad_array[i] < bestsad) {
939 const MV this_mv = {ref_row + tr, ref_col + tc + i};
940 thissad = sad_array[i] +
941 mvsad_err_cost(&this_mv, &fcenter_mv,
942 mvjsadcost, mvsadcost, sad_per_bit);
943 if (thissad < bestsad) {
951 for (i = 0; i < end_col - tc; ++i) {
952 const uint8_t *check_here = in_what + tr * in_what_stride + tc + i;
953 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
956 if (thissad < bestsad) {
957 const MV this_mv = {ref_row + tr, ref_col + tc + i};
958 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
959 mvjsadcost, mvsadcost, sad_per_bit);
961 if (thissad < bestsad) {
971 best_mv->row += best_tr;
972 best_mv->col += best_tc;
976 int vp9_diamond_search_sad_c(const MACROBLOCK *x,
977 MV *ref_mv, MV *best_mv,
978 int search_param, int sad_per_bit, int *num00,
979 const vp9_variance_fn_ptr_t *fn_ptr,
980 const MV *center_mv) {
981 const MACROBLOCKD *const xd = &x->e_mbd;
982 const struct buf_2d *const what = &x->plane[0].src;
983 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
984 // search_param determines the length of the initial step and hence the number
986 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
987 // (MAX_FIRST_STEP/4) pel... etc.
988 const search_site *const ss = &x->ss[search_param * x->searches_per_step];
989 const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
990 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
991 const int *mvjsadcost = x->nmvjointsadcost;
992 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
993 const uint8_t *best_address, *in_what_ref;
994 int best_sad = INT_MAX;
999 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1000 in_what_ref = get_buf_from_mv(in_what, ref_mv);
1001 best_address = in_what_ref;
1005 // Check the starting position
1006 best_sad = fn_ptr->sdf(what->buf, what->stride,
1007 best_address, in_what->stride, 0x7fffffff) +
1008 mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
1012 for (step = 0; step < tot_steps; step++) {
1013 for (j = 0; j < x->searches_per_step; j++) {
1014 const MV mv = {best_mv->row + ss[i].mv.row,
1015 best_mv->col + ss[i].mv.col};
1016 if (is_mv_in(x, &mv)) {
1017 int sad = fn_ptr->sdf(what->buf, what->stride,
1018 best_address + ss[i].offset, in_what->stride,
1020 if (sad < best_sad) {
1021 sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
1023 if (sad < best_sad) {
1033 if (best_site != last_site) {
1034 best_mv->row += ss[best_site].mv.row;
1035 best_mv->col += ss[best_site].mv.col;
1036 best_address += ss[best_site].offset;
1037 last_site = best_site;
1038 #if defined(NEW_DIAMOND_SEARCH)
1040 const MV this_mv = {best_mv->row + ss[best_site].mv.row,
1041 best_mv->col + ss[best_site].mv.col};
1042 if (is_mv_in(x, &this_mv)) {
1043 int sad = fn_ptr->sdf(what->buf, what->stride,
1044 best_address + ss[best_site].offset,
1045 in_what->stride, best_sad);
1046 if (sad < best_sad) {
1047 sad += mvsad_err_cost(&this_mv, &fcenter_mv,
1048 mvjsadcost, mvsadcost, sad_per_bit);
1049 if (sad < best_sad) {
1051 best_mv->row += ss[best_site].mv.row;
1052 best_mv->col += ss[best_site].mv.col;
1053 best_address += ss[best_site].offset;
1061 } else if (best_address == in_what_ref) {
1068 int vp9_diamond_search_sadx4(const MACROBLOCK *x,
1069 MV *ref_mv, MV *best_mv, int search_param,
1070 int sad_per_bit, int *num00,
1071 const vp9_variance_fn_ptr_t *fn_ptr,
1072 const MV *center_mv) {
1075 const MACROBLOCKD *const xd = &x->e_mbd;
1076 uint8_t *what = x->plane[0].src.buf;
1077 const int what_stride = x->plane[0].src.stride;
1078 const uint8_t *in_what;
1079 const int in_what_stride = xd->plane[0].pre[0].stride;
1080 const uint8_t *best_address;
1082 unsigned int bestsad = INT_MAX;
1089 // search_param determines the length of the initial step and hence the number
1091 // 0 = initial step (MAX_FIRST_STEP) pel
1092 // 1 = (MAX_FIRST_STEP/2) pel,
1093 // 2 = (MAX_FIRST_STEP/4) pel...
1094 const search_site *ss = &x->ss[search_param * x->searches_per_step];
1095 const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1097 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1099 const int *mvjsadcost = x->nmvjointsadcost;
1100 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1102 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1103 ref_row = ref_mv->row;
1104 ref_col = ref_mv->col;
1106 best_mv->row = ref_row;
1107 best_mv->col = ref_col;
1109 // Work out the start point for the search
1110 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
1111 best_address = in_what;
1113 // Check the starting position
1114 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
1115 + mvsad_err_cost(best_mv, &fcenter_mv,
1116 mvjsadcost, mvsadcost, sad_per_bit);
1120 for (step = 0; step < tot_steps; step++) {
1123 // All_in is true if every one of the points we are checking are within
1124 // the bounds of the image.
1125 all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_row_min);
1126 all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_row_max);
1127 all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_col_min);
1128 all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_col_max);
1130 // If all the pixels are within the bounds we don't check whether the
1131 // search point is valid in this loop, otherwise we check each point
1134 unsigned int sad_array[4];
1136 for (j = 0; j < x->searches_per_step; j += 4) {
1137 unsigned char const *block_offset[4];
1139 for (t = 0; t < 4; t++)
1140 block_offset[t] = ss[i + t].offset + best_address;
1142 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1145 for (t = 0; t < 4; t++, i++) {
1146 if (sad_array[t] < bestsad) {
1147 const MV this_mv = {best_mv->row + ss[i].mv.row,
1148 best_mv->col + ss[i].mv.col};
1149 sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
1150 mvjsadcost, mvsadcost, sad_per_bit);
1152 if (sad_array[t] < bestsad) {
1153 bestsad = sad_array[t];
1160 for (j = 0; j < x->searches_per_step; j++) {
1161 // Trap illegal vectors
1162 const MV this_mv = {best_mv->row + ss[i].mv.row,
1163 best_mv->col + ss[i].mv.col};
1165 if (is_mv_in(x, &this_mv)) {
1166 const uint8_t *const check_here = ss[i].offset + best_address;
1167 unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
1168 in_what_stride, bestsad);
1170 if (thissad < bestsad) {
1171 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1172 mvjsadcost, mvsadcost, sad_per_bit);
1174 if (thissad < bestsad) {
1183 if (best_site != last_site) {
1184 best_mv->row += ss[best_site].mv.row;
1185 best_mv->col += ss[best_site].mv.col;
1186 best_address += ss[best_site].offset;
1187 last_site = best_site;
1188 #if defined(NEW_DIAMOND_SEARCH)
1190 const MV this_mv = {best_mv->row + ss[best_site].mv.row,
1191 best_mv->col + ss[best_site].mv.col};
1192 if (is_mv_in(x, &this_mv)) {
1193 const uint8_t *const check_here = ss[best_site].offset + best_address;
1194 unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
1195 in_what_stride, bestsad);
1196 if (thissad < bestsad) {
1197 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1198 mvjsadcost, mvsadcost, sad_per_bit);
1199 if (thissad < bestsad) {
1201 best_mv->row += ss[best_site].mv.row;
1202 best_mv->col += ss[best_site].mv.col;
1203 best_address += ss[best_site].offset;
1211 } else if (best_address == in_what) {
1218 /* do_refine: If last step (1-away) of n-step search doesn't pick the center
1219 point as the best match, we will do a final 1-away diamond
1222 int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
1223 MV *mvp_full, int step_param,
1224 int sadpb, int further_steps, int do_refine,
1225 const vp9_variance_fn_ptr_t *fn_ptr,
1226 const MV *ref_mv, MV *dst_mv) {
1228 int thissme, n, num00 = 0;
1229 int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
1230 step_param, sadpb, &n,
1232 if (bestsme < INT_MAX)
1233 bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
1236 // If there won't be more n-step search, check to see if refining search is
1238 if (n > further_steps)
1241 while (n < further_steps) {
1247 thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
1248 step_param + n, sadpb, &num00,
1250 if (thissme < INT_MAX)
1251 thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
1253 // check to see if refining search is needed.
1254 if (num00 > further_steps - n)
1257 if (thissme < bestsme) {
1264 // final 1-away diamond refining search
1266 const int search_range = 8;
1267 MV best_mv = *dst_mv;
1268 thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range,
1270 if (thissme < INT_MAX)
1271 thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
1272 if (thissme < bestsme) {
1280 int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
1281 int sad_per_bit, int distance,
1282 const vp9_variance_fn_ptr_t *fn_ptr,
1283 const MV *center_mv, MV *best_mv) {
1285 const MACROBLOCKD *const xd = &x->e_mbd;
1286 const struct buf_2d *const what = &x->plane[0].src;
1287 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1288 const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
1289 const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
1290 const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
1291 const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
1292 const int *mvjsadcost = x->nmvjointsadcost;
1293 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1294 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1295 int best_sad = fn_ptr->sdf(what->buf, what->stride,
1296 get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
1297 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
1300 for (r = row_min; r < row_max; ++r) {
1301 for (c = col_min; c < col_max; ++c) {
1302 const MV mv = {r, c};
1303 const int sad = fn_ptr->sdf(what->buf, what->stride,
1304 get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) +
1305 mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
1308 if (sad < best_sad) {
1317 int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
1318 int sad_per_bit, int distance,
1319 const vp9_variance_fn_ptr_t *fn_ptr,
1320 const MV *center_mv, MV *best_mv) {
1321 const MACROBLOCKD *const xd = &x->e_mbd;
1322 const uint8_t *const what = x->plane[0].src.buf;
1323 const int what_stride = x->plane[0].src.stride;
1324 const uint8_t *const in_what = xd->plane[0].pre[0].buf;
1325 const int in_what_stride = xd->plane[0].pre[0].stride;
1327 unsigned int bestsad = INT_MAX;
1329 unsigned int thissad;
1330 int ref_row = ref_mv->row;
1331 int ref_col = ref_mv->col;
1333 // Apply further limits to prevent us looking using vectors that stretch
1334 // beyond the UMV border
1335 const int row_min = MAX(ref_row - distance, x->mv_row_min);
1336 const int row_max = MIN(ref_row + distance, x->mv_row_max);
1337 const int col_min = MAX(ref_col - distance, x->mv_col_min);
1338 const int col_max = MIN(ref_col + distance, x->mv_col_max);
1339 unsigned int sad_array[3];
1340 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1341 const int *mvjsadcost = x->nmvjointsadcost;
1342 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1344 // Work out the mid point for the search
1345 const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
1347 best_mv->row = ref_row;
1348 best_mv->col = ref_col;
1350 // Baseline value at the centre
1351 bestsad = fn_ptr->sdf(what, what_stride,
1352 bestaddress, in_what_stride, 0x7fffffff)
1353 + mvsad_err_cost(best_mv, &fcenter_mv,
1354 mvjsadcost, mvsadcost, sad_per_bit);
1356 for (r = row_min; r < row_max; r++) {
1357 const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
1361 while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) {
1364 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1366 for (i = 0; i < 3; i++) {
1367 thissad = sad_array[i];
1369 if (thissad < bestsad) {
1371 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1372 mvjsadcost, mvsadcost, sad_per_bit);
1374 if (thissad < bestsad) {
1385 while (c < col_max) {
1386 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
1389 if (thissad < bestsad) {
1391 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1392 mvjsadcost, mvsadcost, sad_per_bit);
1394 if (thissad < bestsad) {
1408 int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
1409 int sad_per_bit, int distance,
1410 const vp9_variance_fn_ptr_t *fn_ptr,
1411 const MV *center_mv, MV *best_mv) {
1412 const MACROBLOCKD *const xd = &x->e_mbd;
1413 const uint8_t *const what = x->plane[0].src.buf;
1414 const int what_stride = x->plane[0].src.stride;
1415 const uint8_t *const in_what = xd->plane[0].pre[0].buf;
1416 const int in_what_stride = xd->plane[0].pre[0].stride;
1418 unsigned int bestsad = INT_MAX;
1420 int ref_row = ref_mv->row;
1421 int ref_col = ref_mv->col;
1423 // Apply further limits to prevent us looking using vectors that stretch
1424 // beyond the UMV border
1425 const int row_min = MAX(ref_row - distance, x->mv_row_min);
1426 const int row_max = MIN(ref_row + distance, x->mv_row_max);
1427 const int col_min = MAX(ref_col - distance, x->mv_col_min);
1428 const int col_max = MIN(ref_col + distance, x->mv_col_max);
1429 DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8);
1430 unsigned int sad_array[3];
1431 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1433 const int *mvjsadcost = x->nmvjointsadcost;
1434 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1436 // Work out the mid point for the search
1437 const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
1439 best_mv->row = ref_row;
1440 best_mv->col = ref_col;
1442 // Baseline value at the center
1443 bestsad = fn_ptr->sdf(what, what_stride,
1444 bestaddress, in_what_stride, 0x7fffffff)
1445 + mvsad_err_cost(best_mv, &fcenter_mv,
1446 mvjsadcost, mvsadcost, sad_per_bit);
1448 for (r = row_min; r < row_max; r++) {
1449 const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
1453 while ((c + 7) < col_max) {
1456 fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
1458 for (i = 0; i < 8; i++) {
1459 unsigned int thissad = (unsigned int)sad_array8[i];
1461 if (thissad < bestsad) {
1463 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1464 mvjsadcost, mvsadcost, sad_per_bit);
1466 if (thissad < bestsad) {
1478 while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) {
1481 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1483 for (i = 0; i < 3; i++) {
1484 unsigned int thissad = sad_array[i];
1486 if (thissad < bestsad) {
1488 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1489 mvjsadcost, mvsadcost, sad_per_bit);
1491 if (thissad < bestsad) {
1503 while (c < col_max) {
1504 unsigned int thissad = fn_ptr->sdf(what, what_stride,
1505 check_here, in_what_stride, bestsad);
1507 if (thissad < bestsad) {
1509 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1510 mvjsadcost, mvsadcost, sad_per_bit);
1512 if (thissad < bestsad) {
1526 int vp9_refining_search_sad_c(const MACROBLOCK *x,
1527 MV *ref_mv, int error_per_bit,
1529 const vp9_variance_fn_ptr_t *fn_ptr,
1530 const MV *center_mv) {
1531 const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
1532 const MACROBLOCKD *const xd = &x->e_mbd;
1533 const struct buf_2d *const what = &x->plane[0].src;
1534 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1535 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1536 const int *mvjsadcost = x->nmvjointsadcost;
1537 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1539 unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
1540 get_buf_from_mv(in_what, ref_mv),
1541 in_what->stride, 0x7fffffff) +
1542 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
1545 for (i = 0; i < search_range; i++) {
1548 for (j = 0; j < 4; j++) {
1549 const MV mv = {ref_mv->row + neighbors[j].row,
1550 ref_mv->col + neighbors[j].col};
1551 if (is_mv_in(x, &mv)) {
1552 unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
1553 get_buf_from_mv(in_what, &mv), in_what->stride, best_sad);
1554 if (sad < best_sad) {
1555 sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
1557 if (sad < best_sad) {
1565 if (best_site == -1) {
1568 ref_mv->row += neighbors[best_site].row;
1569 ref_mv->col += neighbors[best_site].col;
1575 int vp9_refining_search_sadx4(const MACROBLOCK *x,
1576 MV *ref_mv, int error_per_bit,
1578 const vp9_variance_fn_ptr_t *fn_ptr,
1579 const MV *center_mv) {
1580 const MACROBLOCKD *const xd = &x->e_mbd;
1581 const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
1582 const struct buf_2d *const what = &x->plane[0].src;
1583 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1584 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1585 const int *mvjsadcost = x->nmvjointsadcost;
1586 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1587 const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
1588 unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address,
1589 in_what->stride, 0x7fffffff) +
1590 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
1593 for (i = 0; i < search_range; i++) {
1595 const int all_in = ((ref_mv->row - 1) > x->mv_row_min) &
1596 ((ref_mv->row + 1) < x->mv_row_max) &
1597 ((ref_mv->col - 1) > x->mv_col_min) &
1598 ((ref_mv->col + 1) < x->mv_col_max);
1601 unsigned int sads[4];
1602 const uint8_t *const positions[4] = {
1603 best_address - in_what->stride,
1606 best_address + in_what->stride
1609 fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
1611 for (j = 0; j < 4; ++j) {
1612 if (sads[j] < best_sad) {
1613 const MV mv = {ref_mv->row + neighbors[j].row,
1614 ref_mv->col + neighbors[j].col};
1615 sads[j] += mvsad_err_cost(&mv, &fcenter_mv,
1616 mvjsadcost, mvsadcost, error_per_bit);
1618 if (sads[j] < best_sad) {
1625 for (j = 0; j < 4; ++j) {
1626 const MV mv = {ref_mv->row + neighbors[j].row,
1627 ref_mv->col + neighbors[j].col};
1629 if (is_mv_in(x, &mv)) {
1630 unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
1631 get_buf_from_mv(in_what, &mv),
1632 in_what->stride, best_sad);
1633 if (sad < best_sad) {
1634 sad += mvsad_err_cost(&mv, &fcenter_mv,
1635 mvjsadcost, mvsadcost, error_per_bit);
1637 if (sad < best_sad) {
1646 if (best_site == -1) {
1649 ref_mv->row += neighbors[best_site].row;
1650 ref_mv->col += neighbors[best_site].col;
1651 best_address = get_buf_from_mv(in_what, ref_mv);
1658 // This function is called when we do joint motion search in comp_inter_inter
1660 int vp9_refining_search_8p_c(const MACROBLOCK *x,
1661 MV *ref_mv, int error_per_bit,
1663 const vp9_variance_fn_ptr_t *fn_ptr,
1664 const MV *center_mv,
1665 const uint8_t *second_pred, int w, int h) {
1666 const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
1667 {-1, -1}, {1, -1}, {-1, 1}, {1, 1}};
1668 const MACROBLOCKD *const xd = &x->e_mbd;
1669 const struct buf_2d *const what = &x->plane[0].src;
1670 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1671 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1672 const int *mvjsadcost = x->nmvjointsadcost;
1673 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1674 unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride,
1675 get_buf_from_mv(in_what, ref_mv), in_what->stride,
1676 second_pred, 0x7fffffff) +
1677 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
1680 for (i = 0; i < search_range; ++i) {
1683 for (j = 0; j < 8; ++j) {
1684 const MV mv = {ref_mv->row + neighbors[j].row,
1685 ref_mv->col + neighbors[j].col};
1687 if (is_mv_in(x, &mv)) {
1688 unsigned int sad = fn_ptr->sdaf(what->buf, what->stride,
1689 get_buf_from_mv(in_what, &mv), in_what->stride,
1690 second_pred, best_sad);
1691 if (sad < best_sad) {
1692 sad += mvsad_err_cost(&mv, &fcenter_mv,
1693 mvjsadcost, mvsadcost, error_per_bit);
1694 if (sad < best_sad) {
1702 if (best_site == -1) {
1705 ref_mv->row += neighbors[best_site].row;
1706 ref_mv->col += neighbors[best_site].col;