return p;
}
-#define SP(x) (((x) & 7) << 1)
-unsigned int vp9_sad3x16_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride) {
- return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 3, 16);
-}
-unsigned int vp9_sad16x3_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride) {
- return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 3);
-}
-
-
-unsigned int vp9_variance2x16_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- int sum;
- variance(src_ptr, source_stride, ref_ptr, recon_stride, 2, 16, sse, &sum);
- return (*sse - (((unsigned int)sum * sum) >> 5));
-}
-
-unsigned int vp9_variance16x2_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- int sum;
- variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 2, sse, &sum);
- return (*sse - (((unsigned int)sum * sum) >> 5));
-}
-
-unsigned int vp9_sub_pixel_variance16x2_c(const uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- const uint8_t *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse) {
- uint16_t FData3[16 * 3]; // Temp data buffer used in filtering
- uint8_t temp2[2 * 16];
- const int16_t *HFilter, *VFilter;
-
- HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
-
- var_filter_block2d_bil_first_pass(src_ptr, FData3,
- src_pixels_per_line, 1, 3, 16, HFilter);
- var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 2, 16, VFilter);
-
- return vp9_variance16x2_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_variance2x16_c(const uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- const uint8_t *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse) {
- uint16_t FData3[2 * 17]; // Temp data buffer used in filtering
- uint8_t temp2[2 * 16];
- const int16_t *HFilter, *VFilter;
-
- HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
- VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
-
- var_filter_block2d_bil_first_pass(src_ptr, FData3,
- src_pixels_per_line, 1, 17, 2, HFilter);
- var_filter_block2d_bil_second_pass(FData3, temp2, 2, 2, 16, 2, VFilter);
-
- return vp9_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse);
-}
-
-#if CONFIG_USESELECTREFMV
-/* check a list of motion vectors by sad score using a number rows of pixels
- * above and a number cols of pixels in the left to select the one with best
- * score to use as ref motion vector
- */
-
-void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
- uint8_t *ref_y_buffer,
- int ref_y_stride,
- int_mv *mvlist,
- int_mv *nearest,
- int_mv *near) {
- int i, j;
- uint8_t *above_src;
- uint8_t *above_ref;
-#if !CONFIG_ABOVESPREFMV
- uint8_t *left_src;
- uint8_t *left_ref;
-#endif
- unsigned int score;
- unsigned int sse;
- unsigned int ref_scores[MAX_MV_REF_CANDIDATES] = {0};
- int_mv sorted_mvs[MAX_MV_REF_CANDIDATES];
- int zero_seen = 0;
-
- if (ref_y_buffer) {
-
- // Default all to 0,0 if nothing else available
- nearest->as_int = near->as_int = 0;
- vpx_memset(sorted_mvs, 0, sizeof(sorted_mvs));
-
- above_src = xd->dst.y_buffer - xd->dst.y_stride * 2;
- above_ref = ref_y_buffer - ref_y_stride * 2;
-#if CONFIG_ABOVESPREFMV
- above_src -= 4;
- above_ref -= 4;
-#else
- left_src = xd->dst.y_buffer - 2;
- left_ref = ref_y_buffer - 2;
-#endif
-
- // Limit search to the predicted best few candidates
- for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
- int_mv this_mv;
- int offset = 0;
- int row_offset, col_offset;
-
- this_mv.as_int = mvlist[i].as_int;
-
- // If we see a 0,0 vector for a second time we have reached the end of
- // the list of valid candidate vectors.
- if (!this_mv.as_int && zero_seen)
- break;
-
- zero_seen = zero_seen || !this_mv.as_int;
-
-#if !CONFIG_ABOVESPREFMV
- clamp_mv(&this_mv,
- xd->mb_to_left_edge - LEFT_TOP_MARGIN + 24,
- xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
- xd->mb_to_top_edge - LEFT_TOP_MARGIN + 24,
- xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
-#else
- clamp_mv(&this_mv,
- xd->mb_to_left_edge - LEFT_TOP_MARGIN + 32,
- xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
- xd->mb_to_top_edge - LEFT_TOP_MARGIN + 24,
- xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
-#endif
-
- row_offset = this_mv.as_mv.row >> 3;
- col_offset = this_mv.as_mv.col >> 3;
- offset = ref_y_stride * row_offset + col_offset;
- score = 0;
-#if !CONFIG_ABOVESPREFMV
- if (xd->up_available) {
-#else
- if (xd->up_available && xd->left_available) {
-#endif
- const int bwl = mb_width_log2(xd->mode_info_context->mbmi.sb_type);
- vp9_sub_pixel_variance16x2(above_ref + offset, ref_y_stride,
- SP(this_mv.as_mv.col),
- SP(this_mv.as_mv.row),
- above_src, xd->dst.y_stride, &sse);
- score += sse;
- if (bwl >= 1) {
- vp9_sub_pixel_variance16x2(above_ref + offset + 16,
- ref_y_stride,
- SP(this_mv.as_mv.col),
- SP(this_mv.as_mv.row),
- above_src + 16, xd->dst.y_stride, &sse);
- score += sse;
- }
- if (bwl >= 2) {
- vp9_sub_pixel_variance16x2(above_ref + offset + 32,
- ref_y_stride,
- SP(this_mv.as_mv.col),
- SP(this_mv.as_mv.row),
- above_src + 32, xd->dst.y_stride, &sse);
- score += sse;
- vp9_sub_pixel_variance16x2(above_ref + offset + 48,
- ref_y_stride,
- SP(this_mv.as_mv.col),
- SP(this_mv.as_mv.row),
- above_src + 48, xd->dst.y_stride, &sse);
- score += sse;
- }
- }
-#if !CONFIG_ABOVESPREFMV
- if (xd->left_available) {
- const int bhl = mb_height_log2(xd->mode_info_context->mbmi.sb_type);
- vp9_sub_pixel_variance2x16_c(left_ref + offset, ref_y_stride,
- SP(this_mv.as_mv.col),
- SP(this_mv.as_mv.row),
- left_src, xd->dst.y_stride, &sse);
- score += sse;
- if (bhl >= 1) {
- vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 16,
- ref_y_stride,
- SP(this_mv.as_mv.col),
- SP(this_mv.as_mv.row),
- left_src + xd->dst.y_stride * 16,
- xd->dst.y_stride, &sse);
- score += sse;
- }
- if (bhl >= 2) {
- vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 32,
- ref_y_stride,
- SP(this_mv.as_mv.col),
- SP(this_mv.as_mv.row),
- left_src + xd->dst.y_stride * 32,
- xd->dst.y_stride, &sse);
- score += sse;
- vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 48,
- ref_y_stride,
- SP(this_mv.as_mv.col),
- SP(this_mv.as_mv.row),
- left_src + xd->dst.y_stride * 48,
- xd->dst.y_stride, &sse);
- score += sse;
- }
- }
-#endif
- // Add the entry to our list and then resort the list on score.
- ref_scores[i] = score;
- sorted_mvs[i].as_int = this_mv.as_int;
- j = i;
- while (j > 0) {
- if (ref_scores[j] < ref_scores[j-1]) {
- ref_scores[j] = ref_scores[j-1];
- sorted_mvs[j].as_int = sorted_mvs[j-1].as_int;
- ref_scores[j-1] = score;
- sorted_mvs[j-1].as_int = this_mv.as_int;
- j--;
- } else {
- break;
- }
- }
- }
- } else {
- vpx_memcpy(sorted_mvs, mvlist, sizeof(sorted_mvs));
- }
-
- // Make sure all the candidates are properly clamped etc
- for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
- lower_mv_precision(&sorted_mvs[i], xd->allow_high_precision_mv);
- clamp_mv2(&sorted_mvs[i], xd);
- }
-
- // Nearest may be a 0,0 or non zero vector and now matches the chosen
- // "best reference". This has advantages when it is used as part of a
- // compound predictor as it means a non zero vector can be paired using
- // this mode with a 0 vector. The Near vector is still forced to be a
- // non zero candidate if one is avaialble.
- nearest->as_int = sorted_mvs[0].as_int;
- if ( sorted_mvs[1].as_int ) {
- near->as_int = sorted_mvs[1].as_int;
- } else {
- near->as_int = sorted_mvs[2].as_int;
- }
-
- // Copy back the re-ordered mv list
- vpx_memcpy(mvlist, sorted_mvs, sizeof(sorted_mvs));
-}
-#else
void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
uint8_t *ref_y_buffer,
int ref_y_stride,
*nearest = mvlist[0];
*near = mvlist[1];
}
-#endif
int yoffset,
int *sum,
unsigned int *sumsquared);
-
-unsigned int vp9_sub_pixel_variance16x2_sse2(const unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- const unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse) {
- int xsum0, xsum1;
- unsigned int xxsum0, xxsum1;
-
- if (xoffset == HALFNDX && yoffset == 0) {
- vp9_half_horiz_variance16x_h_sse2(
- src_ptr, src_pixels_per_line,
- dst_ptr, dst_pixels_per_line, 2,
- &xsum0, &xxsum0);
- } else if (xoffset == 0 && yoffset == HALFNDX) {
- vp9_half_vert_variance16x_h_sse2(
- src_ptr, src_pixels_per_line,
- dst_ptr, dst_pixels_per_line, 2,
- &xsum0, &xxsum0);
- } else if (xoffset == HALFNDX && yoffset == HALFNDX) {
- vp9_half_horiz_vert_variance16x_h_sse2(
- src_ptr, src_pixels_per_line,
- dst_ptr, dst_pixels_per_line, 2,
- &xsum0, &xxsum0);
- } else {
- vp9_filter_block2d_bil_var_sse2(
- src_ptr, src_pixels_per_line,
- dst_ptr, dst_pixels_per_line, 2,
- xoffset, yoffset,
- &xsum0, &xxsum0);
-
- vp9_filter_block2d_bil_var_sse2(
- src_ptr + 8, src_pixels_per_line,
- dst_ptr + 8, dst_pixels_per_line, 2,
- xoffset, yoffset,
- &xsum1, &xxsum1);
- xsum0 += xsum1;
- xxsum0 += xxsum1;
- }
-
- *sse = xxsum0;
- return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 5));
-}