Always use src/ref and _ptr/_stride suffixes.
Normalize to [xy]_offset and second_pred.
Drop some stray source/recon_strides.
BUG=webm:1444
Change-Id: I32362a50988eb84464ab78686348610ea40e5c80
// 4xM filter writes an extra row to fdata because it processes two rows at a
// time.
-#define sub_pixel_varianceNxM(n, m) \
- uint32_t vpx_sub_pixel_variance##n##x##m##_neon( \
- const uint8_t *a, int a_stride, int xoffset, int yoffset, \
- const uint8_t *b, int b_stride, uint32_t *sse) { \
- uint8_t temp0[n * (m + (n == 4 ? 2 : 1))]; \
- uint8_t temp1[n * m]; \
- \
- if (n == 4) { \
- var_filter_block2d_bil_w4(a, temp0, a_stride, 1, (m + 2), \
- bilinear_filters[xoffset]); \
- var_filter_block2d_bil_w4(temp0, temp1, n, n, m, \
- bilinear_filters[yoffset]); \
- } else if (n == 8) { \
- var_filter_block2d_bil_w8(a, temp0, a_stride, 1, (m + 1), \
- bilinear_filters[xoffset]); \
- var_filter_block2d_bil_w8(temp0, temp1, n, n, m, \
- bilinear_filters[yoffset]); \
- } else { \
- var_filter_block2d_bil_w16(a, temp0, a_stride, 1, (m + 1), n, \
- bilinear_filters[xoffset]); \
- var_filter_block2d_bil_w16(temp0, temp1, n, n, m, n, \
- bilinear_filters[yoffset]); \
- } \
- return vpx_variance##n##x##m(temp1, n, b, b_stride, sse); \
+#define sub_pixel_varianceNxM(n, m) \
+ uint32_t vpx_sub_pixel_variance##n##x##m##_neon( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
+ uint8_t temp0[n * (m + (n == 4 ? 2 : 1))]; \
+ uint8_t temp1[n * m]; \
+ \
+ if (n == 4) { \
+ var_filter_block2d_bil_w4(src_ptr, temp0, src_stride, 1, (m + 2), \
+ bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_w4(temp0, temp1, n, n, m, \
+ bilinear_filters[y_offset]); \
+ } else if (n == 8) { \
+ var_filter_block2d_bil_w8(src_ptr, temp0, src_stride, 1, (m + 1), \
+ bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_w8(temp0, temp1, n, n, m, \
+ bilinear_filters[y_offset]); \
+ } else { \
+ var_filter_block2d_bil_w16(src_ptr, temp0, src_stride, 1, (m + 1), n, \
+ bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_w16(temp0, temp1, n, n, m, n, \
+ bilinear_filters[y_offset]); \
+ } \
+ return vpx_variance##n##x##m(temp1, n, ref_ptr, ref_stride, sse); \
}
sub_pixel_varianceNxM(4, 4);
// 4xM filter writes an extra row to fdata because it processes two rows at a
// time.
-#define sub_pixel_avg_varianceNxM(n, m) \
- uint32_t vpx_sub_pixel_avg_variance##n##x##m##_neon( \
- const uint8_t *a, int a_stride, int xoffset, int yoffset, \
- const uint8_t *b, int b_stride, uint32_t *sse, \
- const uint8_t *second_pred) { \
- uint8_t temp0[n * (m + (n == 4 ? 2 : 1))]; \
- uint8_t temp1[n * m]; \
- \
- if (n == 4) { \
- var_filter_block2d_bil_w4(a, temp0, a_stride, 1, (m + 2), \
- bilinear_filters[xoffset]); \
- var_filter_block2d_bil_w4(temp0, temp1, n, n, m, \
- bilinear_filters[yoffset]); \
- } else if (n == 8) { \
- var_filter_block2d_bil_w8(a, temp0, a_stride, 1, (m + 1), \
- bilinear_filters[xoffset]); \
- var_filter_block2d_bil_w8(temp0, temp1, n, n, m, \
- bilinear_filters[yoffset]); \
- } else { \
- var_filter_block2d_bil_w16(a, temp0, a_stride, 1, (m + 1), n, \
- bilinear_filters[xoffset]); \
- var_filter_block2d_bil_w16(temp0, temp1, n, n, m, n, \
- bilinear_filters[yoffset]); \
- } \
- \
- vpx_comp_avg_pred(temp0, second_pred, n, m, temp1, n); \
- \
- return vpx_variance##n##x##m(temp0, n, b, b_stride, sse); \
+#define sub_pixel_avg_varianceNxM(n, m) \
+ uint32_t vpx_sub_pixel_avg_variance##n##x##m##_neon( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \
+ const uint8_t *second_pred) { \
+ uint8_t temp0[n * (m + (n == 4 ? 2 : 1))]; \
+ uint8_t temp1[n * m]; \
+ \
+ if (n == 4) { \
+ var_filter_block2d_bil_w4(src_ptr, temp0, src_stride, 1, (m + 2), \
+ bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_w4(temp0, temp1, n, n, m, \
+ bilinear_filters[y_offset]); \
+ } else if (n == 8) { \
+ var_filter_block2d_bil_w8(src_ptr, temp0, src_stride, 1, (m + 1), \
+ bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_w8(temp0, temp1, n, n, m, \
+ bilinear_filters[y_offset]); \
+ } else { \
+ var_filter_block2d_bil_w16(src_ptr, temp0, src_stride, 1, (m + 1), n, \
+ bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_w16(temp0, temp1, n, n, m, n, \
+ bilinear_filters[y_offset]); \
+ } \
+ \
+ vpx_comp_avg_pred(temp0, second_pred, n, m, temp1, n); \
+ \
+ return vpx_variance##n##x##m(temp0, n, ref_ptr, ref_stride, sse); \
}
sub_pixel_avg_varianceNxM(4, 4);
// this limit.
// Process a block of width 4 four rows at a time.
-static void variance_neon_w4x4(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int h, uint32_t *sse, int *sum) {
+static void variance_neon_w4x4(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride, int h,
+ uint32_t *sse, int *sum) {
int i;
int16x8_t sum_s16 = vdupq_n_s16(0);
int32x4_t sse_lo_s32 = vdupq_n_s32(0);
assert(h <= 256);
for (i = 0; i < h; i += 4) {
- const uint8x16_t a_u8 = load_unaligned_u8q(a, a_stride);
- const uint8x16_t b_u8 = load_unaligned_u8q(b, b_stride);
+ const uint8x16_t a_u8 = load_unaligned_u8q(src_ptr, src_stride);
+ const uint8x16_t b_u8 = load_unaligned_u8q(ref_ptr, ref_stride);
const uint16x8_t diff_lo_u16 =
vsubl_u8(vget_low_u8(a_u8), vget_low_u8(b_u8));
const uint16x8_t diff_hi_u16 =
sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_high_s16(diff_hi_s16),
vget_high_s16(diff_hi_s16));
- a += 4 * a_stride;
- b += 4 * b_stride;
+ src_ptr += 4 * src_stride;
+ ref_ptr += 4 * ref_stride;
}
*sum = vget_lane_s32(horizontal_add_int16x8(sum_s16), 0);
}
// Process a block of any size where the width is divisible by 16.
-static void variance_neon_w16(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int w, int h, uint32_t *sse,
- int *sum) {
+static void variance_neon_w16(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride, int w,
+ int h, uint32_t *sse, int *sum) {
int i, j;
int16x8_t sum_s16 = vdupq_n_s16(0);
int32x4_t sse_lo_s32 = vdupq_n_s32(0);
for (i = 0; i < h; ++i) {
for (j = 0; j < w; j += 16) {
- const uint8x16_t a_u8 = vld1q_u8(a + j);
- const uint8x16_t b_u8 = vld1q_u8(b + j);
+ const uint8x16_t a_u8 = vld1q_u8(src_ptr + j);
+ const uint8x16_t b_u8 = vld1q_u8(ref_ptr + j);
const uint16x8_t diff_lo_u16 =
vsubl_u8(vget_low_u8(a_u8), vget_low_u8(b_u8));
sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_high_s16(diff_hi_s16),
vget_high_s16(diff_hi_s16));
}
- a += a_stride;
- b += b_stride;
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
}
*sum = vget_lane_s32(horizontal_add_int16x8(sum_s16), 0);
}
// Process a block of width 8 two rows at a time.
-static void variance_neon_w8x2(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int h, uint32_t *sse, int *sum) {
+static void variance_neon_w8x2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride, int h,
+ uint32_t *sse, int *sum) {
int i = 0;
int16x8_t sum_s16 = vdupq_n_s16(0);
int32x4_t sse_lo_s32 = vdupq_n_s32(0);
assert(h <= 128);
do {
- const uint8x8_t a_0_u8 = vld1_u8(a);
- const uint8x8_t a_1_u8 = vld1_u8(a + a_stride);
- const uint8x8_t b_0_u8 = vld1_u8(b);
- const uint8x8_t b_1_u8 = vld1_u8(b + b_stride);
+ const uint8x8_t a_0_u8 = vld1_u8(src_ptr);
+ const uint8x8_t a_1_u8 = vld1_u8(src_ptr + src_stride);
+ const uint8x8_t b_0_u8 = vld1_u8(ref_ptr);
+ const uint8x8_t b_1_u8 = vld1_u8(ref_ptr + ref_stride);
const uint16x8_t diff_0_u16 = vsubl_u8(a_0_u8, b_0_u8);
const uint16x8_t diff_1_u16 = vsubl_u8(a_1_u8, b_1_u8);
const int16x8_t diff_0_s16 = vreinterpretq_s16_u16(diff_0_u16);
vget_high_s16(diff_0_s16));
sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_high_s16(diff_1_s16),
vget_high_s16(diff_1_s16));
- a += a_stride + a_stride;
- b += b_stride + b_stride;
+ src_ptr += src_stride + src_stride;
+ ref_ptr += ref_stride + ref_stride;
i += 2;
} while (i < h);
0);
}
-void vpx_get8x8var_neon(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, unsigned int *sse, int *sum) {
- variance_neon_w8x2(a, a_stride, b, b_stride, 8, sse, sum);
+void vpx_get8x8var_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
+ unsigned int *sse, int *sum) {
+ variance_neon_w8x2(src_ptr, src_stride, ref_ptr, ref_stride, 8, sse, sum);
}
-void vpx_get16x16var_neon(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, unsigned int *sse, int *sum) {
- variance_neon_w16(a, a_stride, b, b_stride, 16, 16, sse, sum);
+void vpx_get16x16var_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
+ unsigned int *sse, int *sum) {
+ variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
}
-#define varianceNxM(n, m, shift) \
- unsigned int vpx_variance##n##x##m##_neon(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- unsigned int *sse) { \
- int sum; \
- if (n == 4) \
- variance_neon_w4x4(a, a_stride, b, b_stride, m, sse, &sum); \
- else if (n == 8) \
- variance_neon_w8x2(a, a_stride, b, b_stride, m, sse, &sum); \
- else \
- variance_neon_w16(a, a_stride, b, b_stride, n, m, sse, &sum); \
- if (n * m < 16 * 16) \
- return *sse - ((sum * sum) >> shift); \
- else \
- return *sse - (uint32_t)(((int64_t)sum * sum) >> shift); \
+#define varianceNxM(n, m, shift) \
+ unsigned int vpx_variance##n##x##m##_neon( \
+ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
+ int ref_stride, unsigned int *sse) { \
+ int sum; \
+ if (n == 4) \
+ variance_neon_w4x4(src_ptr, src_stride, ref_ptr, ref_stride, m, sse, \
+ &sum); \
+ else if (n == 8) \
+ variance_neon_w8x2(src_ptr, src_stride, ref_ptr, ref_stride, m, sse, \
+ &sum); \
+ else \
+ variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, n, m, sse, \
+ &sum); \
+ if (n * m < 16 * 16) \
+ return *sse - ((sum * sum) >> shift); \
+ else \
+ return *sse - (uint32_t)(((int64_t)sum * sum) >> shift); \
}
varianceNxM(4, 4, 4);
varianceNxM(32, 16, 9);
varianceNxM(32, 32, 10);
-unsigned int vpx_variance32x64_neon(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
+unsigned int vpx_variance32x64_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
int sum1, sum2;
uint32_t sse1, sse2;
- variance_neon_w16(a, a_stride, b, b_stride, 32, 32, &sse1, &sum1);
- variance_neon_w16(a + (32 * a_stride), a_stride, b + (32 * b_stride),
- b_stride, 32, 32, &sse2, &sum2);
+ variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, 32, 32, &sse1,
+ &sum1);
+ variance_neon_w16(src_ptr + (32 * src_stride), src_stride,
+ ref_ptr + (32 * ref_stride), ref_stride, 32, 32, &sse2,
+ &sum2);
*sse = sse1 + sse2;
sum1 += sum2;
return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 11);
}
-unsigned int vpx_variance64x32_neon(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
+unsigned int vpx_variance64x32_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
int sum1, sum2;
uint32_t sse1, sse2;
- variance_neon_w16(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
- variance_neon_w16(a + (16 * a_stride), a_stride, b + (16 * b_stride),
- b_stride, 64, 16, &sse2, &sum2);
+ variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, 64, 16, &sse1,
+ &sum1);
+ variance_neon_w16(src_ptr + (16 * src_stride), src_stride,
+ ref_ptr + (16 * ref_stride), ref_stride, 64, 16, &sse2,
+ &sum2);
*sse = sse1 + sse2;
sum1 += sum2;
return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 11);
}
-unsigned int vpx_variance64x64_neon(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
+unsigned int vpx_variance64x64_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
int sum1, sum2;
uint32_t sse1, sse2;
- variance_neon_w16(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
- variance_neon_w16(a + (16 * a_stride), a_stride, b + (16 * b_stride),
- b_stride, 64, 16, &sse2, &sum2);
+ variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, 64, 16, &sse1,
+ &sum1);
+ variance_neon_w16(src_ptr + (16 * src_stride), src_stride,
+ ref_ptr + (16 * ref_stride), ref_stride, 64, 16, &sse2,
+ &sum2);
sse1 += sse2;
sum1 += sum2;
- variance_neon_w16(a + (16 * 2 * a_stride), a_stride, b + (16 * 2 * b_stride),
- b_stride, 64, 16, &sse2, &sum2);
+ variance_neon_w16(src_ptr + (16 * 2 * src_stride), src_stride,
+ ref_ptr + (16 * 2 * ref_stride), ref_stride, 64, 16, &sse2,
+ &sum2);
sse1 += sse2;
sum1 += sum2;
- variance_neon_w16(a + (16 * 3 * a_stride), a_stride, b + (16 * 3 * b_stride),
- b_stride, 64, 16, &sse2, &sum2);
+ variance_neon_w16(src_ptr + (16 * 3 * src_stride), src_stride,
+ ref_ptr + (16 * 3 * ref_stride), ref_stride, 64, 16, &sse2,
+ &sum2);
*sse = sse1 + sse2;
sum1 += sum2;
return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 12);
}
-unsigned int vpx_mse16x16_neon(const unsigned char *src_ptr, int source_stride,
- const unsigned char *ref_ptr, int recon_stride,
+unsigned int vpx_mse16x16_neon(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr, int ref_stride,
unsigned int *sse) {
int i;
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
for (i = 0; i < 8; i++) { // mse16x16_neon_loop
q0u8 = vld1q_u8(src_ptr);
- src_ptr += source_stride;
+ src_ptr += src_stride;
q1u8 = vld1q_u8(src_ptr);
- src_ptr += source_stride;
+ src_ptr += src_stride;
q2u8 = vld1q_u8(ref_ptr);
- ref_ptr += recon_stride;
+ ref_ptr += ref_stride;
q3u8 = vld1q_u8(ref_ptr);
- ref_ptr += recon_stride;
+ ref_ptr += ref_stride;
q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
}
-unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr,
- int source_stride,
+unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr,
- int recon_stride) {
+ int ref_stride) {
int16x4_t d22s16, d24s16, d26s16, d28s16;
int64x1_t d0s64;
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
int64x2_t q1s64;
d0u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
+ src_ptr += src_stride;
d4u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
+ ref_ptr += ref_stride;
d1u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
+ src_ptr += src_stride;
d5u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
+ ref_ptr += ref_stride;
d2u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
+ src_ptr += src_stride;
d6u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
+ ref_ptr += ref_stride;
d3u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
+ src_ptr += src_stride;
d7u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
+ ref_ptr += ref_stride;
q11u16 = vsubl_u8(d0u8, d4u8);
q12u16 = vsubl_u8(d1u8, d5u8);
#define VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(wd, ht) \
uint32_t vpx_sub_pixel_variance##wd##x##ht##_msa( \
- const uint8_t *src, int32_t src_stride, int32_t xoffset, \
- int32_t yoffset, const uint8_t *ref, int32_t ref_stride, \
+ const uint8_t *src, int32_t src_stride, int32_t x_offset, \
+ int32_t y_offset, const uint8_t *ref, int32_t ref_stride, \
uint32_t *sse) { \
int32_t diff; \
uint32_t var; \
- const uint8_t *h_filter = bilinear_filters_msa[xoffset]; \
- const uint8_t *v_filter = bilinear_filters_msa[yoffset]; \
+ const uint8_t *h_filter = bilinear_filters_msa[x_offset]; \
+ const uint8_t *v_filter = bilinear_filters_msa[y_offset]; \
\
- if (yoffset) { \
- if (xoffset) { \
+ if (y_offset) { \
+ if (x_offset) { \
*sse = sub_pixel_sse_diff_##wd##width_hv_msa( \
src, src_stride, ref, ref_stride, h_filter, v_filter, ht, &diff); \
} else { \
\
var = VARIANCE_##wd##Wx##ht##H(*sse, diff); \
} else { \
- if (xoffset) { \
+ if (x_offset) { \
*sse = sub_pixel_sse_diff_##wd##width_h_msa( \
src, src_stride, ref, ref_stride, h_filter, ht, &diff); \
\
#define VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(wd, ht) \
uint32_t vpx_sub_pixel_avg_variance##wd##x##ht##_msa( \
- const uint8_t *src_ptr, int32_t src_stride, int32_t xoffset, \
- int32_t yoffset, const uint8_t *ref_ptr, int32_t ref_stride, \
+ const uint8_t *src_ptr, int32_t src_stride, int32_t x_offset, \
+ int32_t y_offset, const uint8_t *ref_ptr, int32_t ref_stride, \
uint32_t *sse, const uint8_t *sec_pred) { \
int32_t diff; \
- const uint8_t *h_filter = bilinear_filters_msa[xoffset]; \
- const uint8_t *v_filter = bilinear_filters_msa[yoffset]; \
+ const uint8_t *h_filter = bilinear_filters_msa[x_offset]; \
+ const uint8_t *v_filter = bilinear_filters_msa[y_offset]; \
\
- if (yoffset) { \
- if (xoffset) { \
+ if (y_offset) { \
+ if (x_offset) { \
*sse = sub_pixel_avg_sse_diff_##wd##width_hv_msa( \
src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, \
v_filter, ht, &diff); \
&diff); \
} \
} else { \
- if (xoffset) { \
+ if (x_offset) { \
*sse = sub_pixel_avg_sse_diff_##wd##width_h_msa( \
src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, ht, \
&diff); \
uint32_t vpx_sub_pixel_avg_variance32x64_msa(const uint8_t *src_ptr,
int32_t src_stride,
- int32_t xoffset, int32_t yoffset,
+ int32_t x_offset, int32_t y_offset,
const uint8_t *ref_ptr,
int32_t ref_stride, uint32_t *sse,
const uint8_t *sec_pred) {
int32_t diff;
- const uint8_t *h_filter = bilinear_filters_msa[xoffset];
- const uint8_t *v_filter = bilinear_filters_msa[yoffset];
+ const uint8_t *h_filter = bilinear_filters_msa[x_offset];
+ const uint8_t *v_filter = bilinear_filters_msa[y_offset];
- if (yoffset) {
- if (xoffset) {
+ if (y_offset) {
+ if (x_offset) {
*sse = sub_pixel_avg_sse_diff_32width_hv_msa(
src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter,
v_filter, 64, &diff);
v_filter, 64, &diff);
}
} else {
- if (xoffset) {
+ if (x_offset) {
*sse = sub_pixel_avg_sse_diff_32width_h_msa(src_ptr, src_stride, ref_ptr,
ref_stride, sec_pred,
h_filter, 64, &diff);
#define VPX_SUB_PIXEL_AVG_VARIANCE64XHEIGHT_MSA(ht) \
uint32_t vpx_sub_pixel_avg_variance64x##ht##_msa( \
- const uint8_t *src_ptr, int32_t src_stride, int32_t xoffset, \
- int32_t yoffset, const uint8_t *ref_ptr, int32_t ref_stride, \
+ const uint8_t *src_ptr, int32_t src_stride, int32_t x_offset, \
+ int32_t y_offset, const uint8_t *ref_ptr, int32_t ref_stride, \
uint32_t *sse, const uint8_t *sec_pred) { \
int32_t diff; \
- const uint8_t *h_filter = bilinear_filters_msa[xoffset]; \
- const uint8_t *v_filter = bilinear_filters_msa[yoffset]; \
+ const uint8_t *h_filter = bilinear_filters_msa[x_offset]; \
+ const uint8_t *v_filter = bilinear_filters_msa[y_offset]; \
\
- if (yoffset) { \
- if (xoffset) { \
+ if (y_offset) { \
+ if (x_offset) { \
*sse = sub_pixel_avg_sse_diff_64width_hv_msa( \
src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, \
v_filter, ht, &diff); \
&diff); \
} \
} else { \
- if (xoffset) { \
+ if (x_offset) { \
*sse = sub_pixel_avg_sse_diff_64width_h_msa( \
src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, ht, \
&diff); \
"paddh %[ftmp12], %[ftmp12], %[ftmp6] \n\t"
#define VARIANCE_SSE_8 \
- "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t" \
- "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t" \
- "gsldlc1 %[ftmp2], 0x07(%[b]) \n\t" \
- "gsldrc1 %[ftmp2], 0x00(%[b]) \n\t" \
+ "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" \
+ "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" \
+ "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t" \
+ "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t" \
"pasubub %[ftmp3], %[ftmp1], %[ftmp2] \n\t" \
"punpcklbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t" \
"punpckhbh %[ftmp5], %[ftmp3], %[ftmp0] \n\t" \
#define VARIANCE_SSE_16 \
VARIANCE_SSE_8 \
- "gsldlc1 %[ftmp1], 0x0f(%[a]) \n\t" \
- "gsldrc1 %[ftmp1], 0x08(%[a]) \n\t" \
- "gsldlc1 %[ftmp2], 0x0f(%[b]) \n\t" \
- "gsldrc1 %[ftmp2], 0x08(%[b]) \n\t" \
+ "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t" \
+ "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \
+ "gsldlc1 %[ftmp2], 0x0f(%[ref_ptr]) \n\t" \
+ "gsldrc1 %[ftmp2], 0x08(%[ref_ptr]) \n\t" \
"pasubub %[ftmp3], %[ftmp1], %[ftmp2] \n\t" \
"punpcklbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t" \
"punpckhbh %[ftmp5], %[ftmp3], %[ftmp0] \n\t" \
#define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_A \
/* calculate fdata3[0]~fdata3[3], store at ftmp2*/ \
- "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t" \
- "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t" \
+ "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" \
+ "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" \
"punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
- "gsldlc1 %[ftmp1], 0x08(%[a]) \n\t" \
- "gsldrc1 %[ftmp1], 0x01(%[a]) \n\t" \
+ "gsldlc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \
+ "gsldrc1 %[ftmp1], 0x01(%[src_ptr]) \n\t" \
"punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
"pmullh %[ftmp2], %[ftmp2], %[filter_x0] \n\t" \
"paddh %[ftmp2], %[ftmp2], %[ff_ph_40] \n\t" \
#define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_B \
/* calculate fdata3[0]~fdata3[3], store at ftmp4*/ \
- "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t" \
- "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t" \
+ "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" \
+ "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" \
"punpcklbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" \
- "gsldlc1 %[ftmp1], 0x08(%[a]) \n\t" \
- "gsldrc1 %[ftmp1], 0x01(%[a]) \n\t" \
+ "gsldlc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \
+ "gsldrc1 %[ftmp1], 0x01(%[src_ptr]) \n\t" \
"punpcklbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \
"pmullh %[ftmp4], %[ftmp4], %[filter_x0] \n\t" \
"paddh %[ftmp4], %[ftmp4], %[ff_ph_40] \n\t" \
#define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A \
/* calculate fdata3[0]~fdata3[7], store at ftmp2 and ftmp3*/ \
- "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t" \
- "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t" \
+ "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" \
+ "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" \
"punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
"punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
- "gsldlc1 %[ftmp1], 0x08(%[a]) \n\t" \
- "gsldrc1 %[ftmp1], 0x01(%[a]) \n\t" \
+ "gsldlc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \
+ "gsldrc1 %[ftmp1], 0x01(%[src_ptr]) \n\t" \
"punpcklbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" \
"punpckhbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \
"pmullh %[ftmp2], %[ftmp2], %[filter_x0] \n\t" \
#define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_B \
/* calculate fdata3[0]~fdata3[7], store at ftmp8 and ftmp9*/ \
- "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t" \
- "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t" \
+ "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" \
+ "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" \
"punpcklbh %[ftmp8], %[ftmp1], %[ftmp0] \n\t" \
"punpckhbh %[ftmp9], %[ftmp1], %[ftmp0] \n\t" \
- "gsldlc1 %[ftmp1], 0x08(%[a]) \n\t" \
- "gsldrc1 %[ftmp1], 0x01(%[a]) \n\t" \
+ "gsldlc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \
+ "gsldrc1 %[ftmp1], 0x01(%[src_ptr]) \n\t" \
"punpcklbh %[ftmp10], %[ftmp1], %[ftmp0] \n\t" \
"punpckhbh %[ftmp11], %[ftmp1], %[ftmp0] \n\t" \
"pmullh %[ftmp8], %[ftmp8], %[filter_x0] \n\t" \
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A \
\
/* calculate fdata3[8]~fdata3[15], store at ftmp4 and ftmp5*/ \
- "gsldlc1 %[ftmp1], 0x0f(%[a]) \n\t" \
- "gsldrc1 %[ftmp1], 0x08(%[a]) \n\t" \
+ "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t" \
+ "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \
"punpcklbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" \
"punpckhbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \
- "gsldlc1 %[ftmp1], 0x10(%[a]) \n\t" \
- "gsldrc1 %[ftmp1], 0x09(%[a]) \n\t" \
+ "gsldlc1 %[ftmp1], 0x10(%[src_ptr]) \n\t" \
+ "gsldrc1 %[ftmp1], 0x09(%[src_ptr]) \n\t" \
"punpcklbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t" \
"punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t" \
"pmullh %[ftmp4], %[ftmp4], %[filter_x0] \n\t" \
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_B \
\
/* calculate fdata3[8]~fdata3[15], store at ftmp10 and ftmp11*/ \
- "gsldlc1 %[ftmp1], 0x0f(%[a]) \n\t" \
- "gsldrc1 %[ftmp1], 0x08(%[a]) \n\t" \
+ "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t" \
+ "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \
"punpcklbh %[ftmp10], %[ftmp1], %[ftmp0] \n\t" \
"punpckhbh %[ftmp11], %[ftmp1], %[ftmp0] \n\t" \
- "gsldlc1 %[ftmp1], 0x10(%[a]) \n\t" \
- "gsldrc1 %[ftmp1], 0x09(%[a]) \n\t" \
+ "gsldlc1 %[ftmp1], 0x10(%[src_ptr]) \n\t" \
+ "gsldrc1 %[ftmp1], 0x09(%[src_ptr]) \n\t" \
"punpcklbh %[ftmp12], %[ftmp1], %[ftmp0] \n\t" \
"punpckhbh %[ftmp13], %[ftmp1], %[ftmp0] \n\t" \
"pmullh %[ftmp10], %[ftmp10], %[filter_x0] \n\t" \
// taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
// applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
// It defines the offset required to move from one input to the next.
-static void var_filter_block2d_bil_first_pass(const uint8_t *a, uint16_t *b,
- unsigned int src_pixels_per_line,
- int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const uint8_t *filter) {
+static void var_filter_block2d_bil_first_pass(
+ const uint8_t *src_ptr, uint16_t *ref_ptr, unsigned int src_pixels_per_line,
+ int pixel_step, unsigned int output_height, unsigned int output_width,
+ const uint8_t *filter) {
unsigned int i, j;
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; ++j) {
- b[j] = ROUND_POWER_OF_TWO(
- (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
+ ref_ptr[j] = ROUND_POWER_OF_TWO(
+ (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
+ FILTER_BITS);
- ++a;
+ ++src_ptr;
}
- a += src_pixels_per_line - output_width;
- b += output_width;
+ src_ptr += src_pixels_per_line - output_width;
+ ref_ptr += output_width;
}
}
// filter is applied horizontally (pixel_step = 1) or vertically
// (pixel_step = stride). It defines the offset required to move from one input
// to the next. Output is 8-bit.
-static void var_filter_block2d_bil_second_pass(const uint16_t *a, uint8_t *b,
- unsigned int src_pixels_per_line,
- unsigned int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const uint8_t *filter) {
+static void var_filter_block2d_bil_second_pass(
+ const uint16_t *src_ptr, uint8_t *ref_ptr, unsigned int src_pixels_per_line,
+ unsigned int pixel_step, unsigned int output_height,
+ unsigned int output_width, const uint8_t *filter) {
unsigned int i, j;
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; ++j) {
- b[j] = ROUND_POWER_OF_TWO(
- (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
- ++a;
+ ref_ptr[j] = ROUND_POWER_OF_TWO(
+ (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
+ FILTER_BITS);
+ ++src_ptr;
}
- a += src_pixels_per_line - output_width;
- b += output_width;
+ src_ptr += src_pixels_per_line - output_width;
+ ref_ptr += output_width;
}
}
-static inline uint32_t vpx_variance64x(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
+static inline uint32_t vpx_variance64x(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
uint32_t *sse, int high) {
int sum;
double ftmp[12];
"xor %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
"xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t"
"1: \n\t"
- "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x07(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x00(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_8_FOR_W64
- "gsldlc1 %[ftmp1], 0x0f(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x08(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x0f(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x08(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x0f(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x08(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_8_FOR_W64
- "gsldlc1 %[ftmp1], 0x17(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x10(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x17(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x10(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x17(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x10(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x17(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x10(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_8_FOR_W64
- "gsldlc1 %[ftmp1], 0x1f(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x18(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x1f(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x18(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x1f(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x18(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x1f(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x18(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_8_FOR_W64
- "gsldlc1 %[ftmp1], 0x27(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x20(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x27(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x20(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x27(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x20(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x27(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x20(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_8_FOR_W64
- "gsldlc1 %[ftmp1], 0x2f(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x28(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x2f(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x28(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x2f(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x28(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x2f(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x28(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_8_FOR_W64
- "gsldlc1 %[ftmp1], 0x37(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x30(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x37(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x30(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x37(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x30(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x37(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x30(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_8_FOR_W64
- "gsldlc1 %[ftmp1], 0x3f(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x38(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x3f(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x38(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x3f(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x38(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x3f(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x38(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_8_FOR_W64
"addiu %[tmp0], %[tmp0], -0x01 \n\t"
- MMI_ADDU(%[a], %[a], %[a_stride])
- MMI_ADDU(%[b], %[b], %[b_stride])
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
+ MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
"bnez %[tmp0], 1b \n\t"
"mfc1 %[tmp1], %[ftmp9] \n\t"
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
[tmp2]"=&r"(tmp[2]),
- [a]"+&r"(a), [b]"+&r"(b),
+ [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr),
[sum]"=&r"(sum)
- : [a_stride]"r"((mips_reg)a_stride),[b_stride]"r"((mips_reg)b_stride),
+ : [src_stride]"r"((mips_reg)src_stride),
+ [ref_stride]"r"((mips_reg)ref_stride),
[high]"r"(&high), [sse]"r"(sse)
: "memory"
);
return *sse - (((int64_t)sum * sum) / (64 * high));
}
-#define VPX_VARIANCE64XN(n) \
- uint32_t vpx_variance64x##n##_mmi(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- uint32_t *sse) { \
- return vpx_variance64x(a, a_stride, b, b_stride, sse, n); \
+#define VPX_VARIANCE64XN(n) \
+ uint32_t vpx_variance64x##n##_mmi(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ uint32_t *sse) { \
+ return vpx_variance64x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \
}
VPX_VARIANCE64XN(64)
VPX_VARIANCE64XN(32)
-uint32_t vpx_variance32x64_mmi(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, uint32_t *sse) {
+uint32_t vpx_variance32x64_mmi(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
+ uint32_t *sse) {
int sum;
double ftmp[12];
uint32_t tmp[3];
"xor %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
"xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t"
"1: \n\t"
- "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x07(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x00(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_8_FOR_W64
- "gsldlc1 %[ftmp1], 0x0f(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x08(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x0f(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x08(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x0f(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x08(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_8_FOR_W64
- "gsldlc1 %[ftmp1], 0x17(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x10(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x17(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x10(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x17(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x10(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x17(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x10(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_8_FOR_W64
- "gsldlc1 %[ftmp1], 0x1f(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x18(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x1f(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x18(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x1f(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x18(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x1f(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x18(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_8_FOR_W64
"addiu %[tmp0], %[tmp0], -0x01 \n\t"
- MMI_ADDU(%[a], %[a], %[a_stride])
- MMI_ADDU(%[b], %[b], %[b_stride])
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
+ MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
"bnez %[tmp0], 1b \n\t"
"mfc1 %[tmp1], %[ftmp9] \n\t"
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
[tmp2]"=&r"(tmp[2]),
- [a]"+&r"(a), [b]"+&r"(b),
+ [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr),
[sum]"=&r"(sum)
- : [a_stride]"r"((mips_reg)a_stride),[b_stride]"r"((mips_reg)b_stride),
+ : [src_stride]"r"((mips_reg)src_stride),
+ [ref_stride]"r"((mips_reg)ref_stride),
[sse]"r"(sse)
: "memory"
);
return *sse - (((int64_t)sum * sum) / 2048);
}
-static inline uint32_t vpx_variance32x(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
+static inline uint32_t vpx_variance32x(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
uint32_t *sse, int high) {
int sum;
double ftmp[13];
"xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t"
"xor %[ftmp12], %[ftmp12], %[ftmp12] \n\t"
"1: \n\t"
- "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x07(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x00(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_8
- "gsldlc1 %[ftmp1], 0x0f(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x08(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x0f(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x08(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x0f(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x08(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_8
- "gsldlc1 %[ftmp1], 0x17(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x10(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x17(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x10(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x17(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x10(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x17(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x10(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_8
- "gsldlc1 %[ftmp1], 0x1f(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x18(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x1f(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x18(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x1f(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x18(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x1f(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x18(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_8
"addiu %[tmp0], %[tmp0], -0x01 \n\t"
- MMI_ADDU(%[a], %[a], %[a_stride])
- MMI_ADDU(%[b], %[b], %[b_stride])
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
+ MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
"bnez %[tmp0], 1b \n\t"
"dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t"
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]),
- [a]"+&r"(a), [b]"+&r"(b)
- : [a_stride]"r"((mips_reg)a_stride),[b_stride]"r"((mips_reg)b_stride),
+ [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr)
+ : [src_stride]"r"((mips_reg)src_stride),
+ [ref_stride]"r"((mips_reg)ref_stride),
[high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum)
: "memory"
);
return *sse - (((int64_t)sum * sum) / (32 * high));
}
-#define VPX_VARIANCE32XN(n) \
- uint32_t vpx_variance32x##n##_mmi(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- uint32_t *sse) { \
- return vpx_variance32x(a, a_stride, b, b_stride, sse, n); \
+#define VPX_VARIANCE32XN(n) \
+ uint32_t vpx_variance32x##n##_mmi(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ uint32_t *sse) { \
+ return vpx_variance32x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \
}
VPX_VARIANCE32XN(32)
VPX_VARIANCE32XN(16)
-static inline uint32_t vpx_variance16x(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
+static inline uint32_t vpx_variance16x(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
uint32_t *sse, int high) {
int sum;
double ftmp[13];
"xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t"
"xor %[ftmp12], %[ftmp12], %[ftmp12] \n\t"
"1: \n\t"
- "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x07(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x00(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_8
- "gsldlc1 %[ftmp1], 0x0f(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x08(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x0f(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x08(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x0f(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x08(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_8
"addiu %[tmp0], %[tmp0], -0x01 \n\t"
- MMI_ADDU(%[a], %[a], %[a_stride])
- MMI_ADDU(%[b], %[b], %[b_stride])
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
+ MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
"bnez %[tmp0], 1b \n\t"
"dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t"
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]),
- [a]"+&r"(a), [b]"+&r"(b)
- : [a_stride]"r"((mips_reg)a_stride),[b_stride]"r"((mips_reg)b_stride),
+ [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr)
+ : [src_stride]"r"((mips_reg)src_stride),
+ [ref_stride]"r"((mips_reg)ref_stride),
[high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum)
: "memory"
);
return *sse - (((int64_t)sum * sum) / (16 * high));
}
-#define VPX_VARIANCE16XN(n) \
- uint32_t vpx_variance16x##n##_mmi(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- uint32_t *sse) { \
- return vpx_variance16x(a, a_stride, b, b_stride, sse, n); \
+#define VPX_VARIANCE16XN(n) \
+ uint32_t vpx_variance16x##n##_mmi(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ uint32_t *sse) { \
+ return vpx_variance16x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \
}
VPX_VARIANCE16XN(32)
VPX_VARIANCE16XN(16)
VPX_VARIANCE16XN(8)
-static inline uint32_t vpx_variance8x(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
+static inline uint32_t vpx_variance8x(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
uint32_t *sse, int high) {
int sum;
double ftmp[13];
"xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t"
"xor %[ftmp12], %[ftmp12], %[ftmp12] \n\t"
"1: \n\t"
- "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x07(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x00(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_8
"addiu %[tmp0], %[tmp0], -0x01 \n\t"
- MMI_ADDU(%[a], %[a], %[a_stride])
- MMI_ADDU(%[b], %[b], %[b_stride])
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
+ MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
"bnez %[tmp0], 1b \n\t"
"dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t"
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]),
- [a]"+&r"(a), [b]"+&r"(b)
- : [a_stride]"r"((mips_reg)a_stride),[b_stride]"r"((mips_reg)b_stride),
+ [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr)
+ : [src_stride]"r"((mips_reg)src_stride),
+ [ref_stride]"r"((mips_reg)ref_stride),
[high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum)
: "memory"
);
return *sse - (((int64_t)sum * sum) / (8 * high));
}
-#define VPX_VARIANCE8XN(n) \
- uint32_t vpx_variance8x##n##_mmi(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- uint32_t *sse) { \
- return vpx_variance8x(a, a_stride, b, b_stride, sse, n); \
+#define VPX_VARIANCE8XN(n) \
+ uint32_t vpx_variance8x##n##_mmi(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ uint32_t *sse) { \
+ return vpx_variance8x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \
}
VPX_VARIANCE8XN(16)
VPX_VARIANCE8XN(8)
VPX_VARIANCE8XN(4)
-static inline uint32_t vpx_variance4x(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
+static inline uint32_t vpx_variance4x(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
uint32_t *sse, int high) {
int sum;
double ftmp[12];
"xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
"1: \n\t"
- "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t"
- "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t"
- "gsldlc1 %[ftmp2], 0x07(%[b]) \n\t"
- "gsldrc1 %[ftmp2], 0x00(%[b]) \n\t"
+ "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
+ "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
+ "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t"
+ "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t"
VARIANCE_SSE_SUM_4
"addiu %[tmp0], %[tmp0], -0x01 \n\t"
- MMI_ADDU(%[a], %[a], %[a_stride])
- MMI_ADDU(%[b], %[b], %[b_stride])
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
+ MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
"bnez %[tmp0], 1b \n\t"
"dsrl %[ftmp9], %[ftmp6], %[ftmp10] \n\t"
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]),
[tmp0]"=&r"(tmp[0]),
- [a]"+&r"(a), [b]"+&r"(b)
- : [a_stride]"r"((mips_reg)a_stride),[b_stride]"r"((mips_reg)b_stride),
+ [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr)
+ : [src_stride]"r"((mips_reg)src_stride),
+ [ref_stride]"r"((mips_reg)ref_stride),
[high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum)
: "memory"
);
return *sse - (((int64_t)sum * sum) / (4 * high));
}
-#define VPX_VARIANCE4XN(n) \
- uint32_t vpx_variance4x##n##_mmi(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- uint32_t *sse) { \
- return vpx_variance4x(a, a_stride, b, b_stride, sse, n); \
+#define VPX_VARIANCE4XN(n) \
+ uint32_t vpx_variance4x##n##_mmi(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ uint32_t *sse) { \
+ return vpx_variance4x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \
}
VPX_VARIANCE4XN(8)
VPX_VARIANCE4XN(4)
-static inline uint32_t vpx_mse16x(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride, uint32_t *sse,
- uint64_t high) {
+static inline uint32_t vpx_mse16x(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
+ uint32_t *sse, uint64_t high) {
double ftmp[12];
uint32_t tmp[1];
VARIANCE_SSE_16
"addiu %[tmp0], %[tmp0], -0x01 \n\t"
- MMI_ADDU(%[a], %[a], %[a_stride])
- MMI_ADDU(%[b], %[b], %[b_stride])
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
+ MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
"bnez %[tmp0], 1b \n\t"
"dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t"
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[tmp0]"=&r"(tmp[0]),
- [a]"+&r"(a), [b]"+&r"(b)
- : [a_stride]"r"((mips_reg)a_stride),[b_stride]"r"((mips_reg)b_stride),
+ [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr)
+ : [src_stride]"r"((mips_reg)src_stride),
+ [ref_stride]"r"((mips_reg)ref_stride),
[high]"r"(&high), [sse]"r"(sse)
: "memory"
);
return *sse;
}
-#define vpx_mse16xN(n) \
- uint32_t vpx_mse16x##n##_mmi(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- uint32_t *sse) { \
- return vpx_mse16x(a, a_stride, b, b_stride, sse, n); \
+#define vpx_mse16xN(n) \
+ uint32_t vpx_mse16x##n##_mmi(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ uint32_t *sse) { \
+ return vpx_mse16x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \
}
vpx_mse16xN(16);
vpx_mse16xN(8);
-static inline uint32_t vpx_mse8x(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride, uint32_t *sse,
- uint64_t high) {
+static inline uint32_t vpx_mse8x(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
+ uint32_t *sse, uint64_t high) {
double ftmp[12];
uint32_t tmp[1];
VARIANCE_SSE_8
"addiu %[tmp0], %[tmp0], -0x01 \n\t"
- MMI_ADDU(%[a], %[a], %[a_stride])
- MMI_ADDU(%[b], %[b], %[b_stride])
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
+ MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
"bnez %[tmp0], 1b \n\t"
"dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t"
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[tmp0]"=&r"(tmp[0]),
- [a]"+&r"(a), [b]"+&r"(b)
- : [a_stride]"r"((mips_reg)a_stride),[b_stride]"r"((mips_reg)b_stride),
+ [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr)
+ : [src_stride]"r"((mips_reg)src_stride),
+ [ref_stride]"r"((mips_reg)ref_stride),
[high]"r"(&high), [sse]"r"(sse)
: "memory"
);
return *sse;
}
-#define vpx_mse8xN(n) \
- uint32_t vpx_mse8x##n##_mmi(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, uint32_t *sse) { \
- return vpx_mse8x(a, a_stride, b, b_stride, sse, n); \
+#define vpx_mse8xN(n) \
+ uint32_t vpx_mse8x##n##_mmi(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ uint32_t *sse) { \
+ return vpx_mse8x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \
}
vpx_mse8xN(16);
vpx_mse8xN(8);
-#define SUBPIX_VAR(W, H) \
- uint32_t vpx_sub_pixel_variance##W##x##H##_mmi( \
- const uint8_t *a, int a_stride, int xoffset, int yoffset, \
- const uint8_t *b, int b_stride, uint32_t *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint8_t temp2[H * W]; \
- \
- var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
- bilinear_filters[xoffset]); \
- var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- bilinear_filters[yoffset]); \
- \
- return vpx_variance##W##x##H##_mmi(temp2, W, b, b_stride, sse); \
+#define SUBPIX_VAR(W, H) \
+ uint32_t vpx_sub_pixel_variance##W##x##H##_mmi( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint8_t temp2[H * W]; \
+ \
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \
+ W, bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters[y_offset]); \
+ \
+ return vpx_variance##W##x##H##_mmi(temp2, W, ref_ptr, ref_stride, sse); \
}
SUBPIX_VAR(64, 64)
SUBPIX_VAR(32, 16)
SUBPIX_VAR(16, 32)
-static inline void var_filter_block2d_bil_16x(const uint8_t *a, int a_stride,
- int xoffset, int yoffset,
- uint8_t *temp2, int counter) {
+static inline void var_filter_block2d_bil_16x(const uint8_t *src_ptr,
+ int src_stride, int x_offset,
+ int y_offset, uint8_t *temp2,
+ int counter) {
uint8_t *temp2_ptr = temp2;
mips_reg l_counter = counter;
double ftmp[15];
DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL };
DECLARE_ALIGNED(8, const uint64_t, mask) = { 0x00ff00ff00ff00ffULL };
- const uint8_t *filter_x = bilinear_filters[xoffset];
- const uint8_t *filter_y = bilinear_filters[yoffset];
+ const uint8_t *filter_x = bilinear_filters[x_offset];
+ const uint8_t *filter_y = bilinear_filters[y_offset];
__asm__ volatile (
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
// fdata3: fdata3[0] ~ fdata3[15]
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_A
- // fdata3 +a_stride*1: fdata3[0] ~ fdata3[15]
- MMI_ADDU(%[a], %[a], %[a_stride])
+ // fdata3 +src_stride*1: fdata3[0] ~ fdata3[15]
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_B
// temp2: temp2[0] ~ temp2[15]
VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_A
- // fdata3 +a_stride*2: fdata3[0] ~ fdata3[15]
- MMI_ADDU(%[a], %[a], %[a_stride])
+ // fdata3 +src_stride*2: fdata3[0] ~ fdata3[15]
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_A
// temp2+16*1: temp2[0] ~ temp2[15]
MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x10)
VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_B
"1: \n\t"
- MMI_ADDU(%[a], %[a], %[a_stride])
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_B
MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x10)
VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_A
- MMI_ADDU(%[a], %[a], %[a_stride])
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_A
MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x10)
VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_B
[ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]),
[ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]),
[ftmp13] "=&f"(ftmp[13]), [ftmp14] "=&f"(ftmp[14]),
- [tmp0] "=&r"(tmp[0]), [a] "+&r"(a), [temp2_ptr] "+&r"(temp2_ptr),
+ [tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr), [temp2_ptr] "+&r"(temp2_ptr),
[counter]"+&r"(l_counter)
: [filter_x0] "f"((uint64_t)filter_x[0]),
[filter_x1] "f"((uint64_t)filter_x[1]),
[filter_y0] "f"((uint64_t)filter_y[0]),
[filter_y1] "f"((uint64_t)filter_y[1]),
- [a_stride] "r"((mips_reg)a_stride), [ff_ph_40] "f"(ff_ph_40),
+ [src_stride] "r"((mips_reg)src_stride), [ff_ph_40] "f"(ff_ph_40),
[mask] "f"(mask)
: "memory"
);
}
-#define SUBPIX_VAR16XN(H) \
- uint32_t vpx_sub_pixel_variance16x##H##_mmi( \
- const uint8_t *a, int a_stride, int xoffset, int yoffset, \
- const uint8_t *b, int b_stride, uint32_t *sse) { \
- uint8_t temp2[16 * H]; \
- var_filter_block2d_bil_16x(a, a_stride, xoffset, yoffset, temp2, \
- (H - 2) / 2); \
- \
- return vpx_variance16x##H##_mmi(temp2, 16, b, b_stride, sse); \
+#define SUBPIX_VAR16XN(H) \
+ uint32_t vpx_sub_pixel_variance16x##H##_mmi( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
+ uint8_t temp2[16 * H]; \
+ var_filter_block2d_bil_16x(src_ptr, src_stride, x_offset, y_offset, temp2, \
+ (H - 2) / 2); \
+ \
+ return vpx_variance16x##H##_mmi(temp2, 16, ref_ptr, ref_stride, sse); \
}
SUBPIX_VAR16XN(16)
SUBPIX_VAR16XN(8)
-static inline void var_filter_block2d_bil_8x(const uint8_t *a, int a_stride,
- int xoffset, int yoffset,
- uint8_t *temp2, int counter) {
+static inline void var_filter_block2d_bil_8x(const uint8_t *src_ptr,
+ int src_stride, int x_offset,
+ int y_offset, uint8_t *temp2,
+ int counter) {
uint8_t *temp2_ptr = temp2;
mips_reg l_counter = counter;
double ftmp[15];
mips_reg tmp[2];
DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL };
DECLARE_ALIGNED(8, const uint64_t, mask) = { 0x00ff00ff00ff00ffULL };
- const uint8_t *filter_x = bilinear_filters[xoffset];
- const uint8_t *filter_y = bilinear_filters[yoffset];
+ const uint8_t *filter_x = bilinear_filters[x_offset];
+ const uint8_t *filter_y = bilinear_filters[y_offset];
__asm__ volatile (
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
// fdata3: fdata3[0] ~ fdata3[7]
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A
- // fdata3 +a_stride*1: fdata3[0] ~ fdata3[7]
- MMI_ADDU(%[a], %[a], %[a_stride])
+ // fdata3 +src_stride*1: fdata3[0] ~ fdata3[7]
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_B
// temp2: temp2[0] ~ temp2[7]
VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_A
- // fdata3 +a_stride*2: fdata3[0] ~ fdata3[7]
- MMI_ADDU(%[a], %[a], %[a_stride])
+ // fdata3 +src_stride*2: fdata3[0] ~ fdata3[7]
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A
// temp2+8*1: temp2[0] ~ temp2[7]
MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x08)
VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_B
"1: \n\t"
- MMI_ADDU(%[a], %[a], %[a_stride])
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_B
MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x08)
VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_A
- MMI_ADDU(%[a], %[a], %[a_stride])
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A
MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x08)
VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_B
[ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]),
[ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]),
[ftmp13] "=&f"(ftmp[13]), [ftmp14] "=&f"(ftmp[14]),
- [tmp0] "=&r"(tmp[0]), [a] "+&r"(a), [temp2_ptr] "+&r"(temp2_ptr),
+ [tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr), [temp2_ptr] "+&r"(temp2_ptr),
[counter]"+&r"(l_counter)
: [filter_x0] "f"((uint64_t)filter_x[0]),
[filter_x1] "f"((uint64_t)filter_x[1]),
[filter_y0] "f"((uint64_t)filter_y[0]),
[filter_y1] "f"((uint64_t)filter_y[1]),
- [a_stride] "r"((mips_reg)a_stride), [ff_ph_40] "f"(ff_ph_40),
+ [src_stride] "r"((mips_reg)src_stride), [ff_ph_40] "f"(ff_ph_40),
[mask] "f"(mask)
: "memory"
);
}
-#define SUBPIX_VAR8XN(H) \
- uint32_t vpx_sub_pixel_variance8x##H##_mmi( \
- const uint8_t *a, int a_stride, int xoffset, int yoffset, \
- const uint8_t *b, int b_stride, uint32_t *sse) { \
- uint8_t temp2[8 * H]; \
- var_filter_block2d_bil_8x(a, a_stride, xoffset, yoffset, temp2, \
- (H - 2) / 2); \
- \
- return vpx_variance8x##H##_mmi(temp2, 8, b, b_stride, sse); \
+#define SUBPIX_VAR8XN(H) \
+ uint32_t vpx_sub_pixel_variance8x##H##_mmi( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
+ uint8_t temp2[8 * H]; \
+ var_filter_block2d_bil_8x(src_ptr, src_stride, x_offset, y_offset, temp2, \
+ (H - 2) / 2); \
+ \
+ return vpx_variance8x##H##_mmi(temp2, 8, ref_ptr, ref_stride, sse); \
}
SUBPIX_VAR8XN(16)
SUBPIX_VAR8XN(8)
SUBPIX_VAR8XN(4)
-static inline void var_filter_block2d_bil_4x(const uint8_t *a, int a_stride,
- int xoffset, int yoffset,
- uint8_t *temp2, int counter) {
+static inline void var_filter_block2d_bil_4x(const uint8_t *src_ptr,
+ int src_stride, int x_offset,
+ int y_offset, uint8_t *temp2,
+ int counter) {
uint8_t *temp2_ptr = temp2;
mips_reg l_counter = counter;
double ftmp[7];
mips_reg tmp[2];
DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL };
DECLARE_ALIGNED(8, const uint64_t, mask) = { 0x00ff00ff00ff00ffULL };
- const uint8_t *filter_x = bilinear_filters[xoffset];
- const uint8_t *filter_y = bilinear_filters[yoffset];
+ const uint8_t *filter_x = bilinear_filters[x_offset];
+ const uint8_t *filter_y = bilinear_filters[y_offset];
__asm__ volatile (
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
// fdata3: fdata3[0] ~ fdata3[3]
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_A
- // fdata3 +a_stride*1: fdata3[0] ~ fdata3[3]
- MMI_ADDU(%[a], %[a], %[a_stride])
+ // fdata3 +src_stride*1: fdata3[0] ~ fdata3[3]
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_B
// temp2: temp2[0] ~ temp2[7]
VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_A
- // fdata3 +a_stride*2: fdata3[0] ~ fdata3[3]
- MMI_ADDU(%[a], %[a], %[a_stride])
+ // fdata3 +src_stride*2: fdata3[0] ~ fdata3[3]
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_A
// temp2+4*1: temp2[0] ~ temp2[7]
MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x04)
VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_B
"1: \n\t"
- MMI_ADDU(%[a], %[a], %[a_stride])
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_B
MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x04)
VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_A
- MMI_ADDU(%[a], %[a], %[a_stride])
+ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_A
MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x04)
VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_B
"bnez %[counter], 1b \n\t"
: [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]),
[ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
- [ftmp6] "=&f"(ftmp[6]), [tmp0] "=&r"(tmp[0]), [a] "+&r"(a),
+ [ftmp6] "=&f"(ftmp[6]), [tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr),
[temp2_ptr] "+&r"(temp2_ptr), [counter]"+&r"(l_counter)
: [filter_x0] "f"((uint64_t)filter_x[0]),
[filter_x1] "f"((uint64_t)filter_x[1]),
[filter_y0] "f"((uint64_t)filter_y[0]),
[filter_y1] "f"((uint64_t)filter_y[1]),
- [a_stride] "r"((mips_reg)a_stride), [ff_ph_40] "f"(ff_ph_40),
+ [src_stride] "r"((mips_reg)src_stride), [ff_ph_40] "f"(ff_ph_40),
[mask] "f"(mask)
: "memory"
);
}
-#define SUBPIX_VAR4XN(H) \
- uint32_t vpx_sub_pixel_variance4x##H##_mmi( \
- const uint8_t *a, int a_stride, int xoffset, int yoffset, \
- const uint8_t *b, int b_stride, uint32_t *sse) { \
- uint8_t temp2[4 * H]; \
- var_filter_block2d_bil_4x(a, a_stride, xoffset, yoffset, temp2, \
- (H - 2) / 2); \
- \
- return vpx_variance4x##H##_mmi(temp2, 4, b, b_stride, sse); \
+#define SUBPIX_VAR4XN(H) \
+ uint32_t vpx_sub_pixel_variance4x##H##_mmi( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
+ uint8_t temp2[4 * H]; \
+ var_filter_block2d_bil_4x(src_ptr, src_stride, x_offset, y_offset, temp2, \
+ (H - 2) / 2); \
+ \
+ return vpx_variance4x##H##_mmi(temp2, 4, ref_ptr, ref_stride, sse); \
}
SUBPIX_VAR4XN(8)
SUBPIX_VAR4XN(4)
-#define SUBPIX_AVG_VAR(W, H) \
- uint32_t vpx_sub_pixel_avg_variance##W##x##H##_mmi( \
- const uint8_t *a, int a_stride, int xoffset, int yoffset, \
- const uint8_t *b, int b_stride, uint32_t *sse, \
- const uint8_t *second_pred) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint8_t temp2[H * W]; \
- DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
- \
- var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
- bilinear_filters[xoffset]); \
- var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- bilinear_filters[yoffset]); \
- \
- vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W); \
- \
- return vpx_variance##W##x##H##_mmi(temp3, W, b, b_stride, sse); \
+#define SUBPIX_AVG_VAR(W, H) \
+ uint32_t vpx_sub_pixel_avg_variance##W##x##H##_mmi( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \
+ const uint8_t *second_pred) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint8_t temp2[H * W]; \
+ DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
+ \
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \
+ W, bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters[y_offset]); \
+ \
+ vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W); \
+ \
+ return vpx_variance##W##x##H##_mmi(temp3, W, ref_ptr, ref_stride, sse); \
}
SUBPIX_AVG_VAR(64, 64)
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/ppc/types_vsx.h"
-uint32_t vpx_get4x4sse_cs_vsx(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride) {
+uint32_t vpx_get4x4sse_cs_vsx(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride) {
int distortion;
- const int16x8_t a0 = unpack_to_s16_h(read4x2(a, a_stride));
- const int16x8_t a1 = unpack_to_s16_h(read4x2(a + a_stride * 2, a_stride));
- const int16x8_t b0 = unpack_to_s16_h(read4x2(b, b_stride));
- const int16x8_t b1 = unpack_to_s16_h(read4x2(b + b_stride * 2, b_stride));
+ const int16x8_t a0 = unpack_to_s16_h(read4x2(src_ptr, src_stride));
+ const int16x8_t a1 =
+ unpack_to_s16_h(read4x2(src_ptr + src_stride * 2, src_stride));
+ const int16x8_t b0 = unpack_to_s16_h(read4x2(ref_ptr, ref_stride));
+ const int16x8_t b1 =
+ unpack_to_s16_h(read4x2(ref_ptr + ref_stride * 2, ref_stride));
const int16x8_t d0 = vec_sub(a0, b0);
const int16x8_t d1 = vec_sub(a1, b1);
const int32x4_t ds = vec_msum(d1, d1, vec_msum(d0, d0, vec_splat_s32(0)));
}
// TODO(lu_zero): Unroll
-uint32_t vpx_get_mb_ss_vsx(const int16_t *a) {
+uint32_t vpx_get_mb_ss_vsx(const int16_t *src_ptr) {
unsigned int i, sum = 0;
int32x4_t s = vec_splat_s32(0);
for (i = 0; i < 256; i += 8) {
- const int16x8_t v = vec_vsx_ld(0, a + i);
+ const int16x8_t v = vec_vsx_ld(0, src_ptr + i);
s = vec_msum(v, v, s);
}
}
}
-static INLINE void variance_inner_32(const uint8_t *a, const uint8_t *b,
+static INLINE void variance_inner_32(const uint8_t *src_ptr,
+ const uint8_t *ref_ptr,
int32x4_t *sum_squared, int32x4_t *sum) {
int32x4_t s = *sum;
int32x4_t ss = *sum_squared;
- const uint8x16_t va0 = vec_vsx_ld(0, a);
- const uint8x16_t vb0 = vec_vsx_ld(0, b);
- const uint8x16_t va1 = vec_vsx_ld(16, a);
- const uint8x16_t vb1 = vec_vsx_ld(16, b);
+ const uint8x16_t va0 = vec_vsx_ld(0, src_ptr);
+ const uint8x16_t vb0 = vec_vsx_ld(0, ref_ptr);
+ const uint8x16_t va1 = vec_vsx_ld(16, src_ptr);
+ const uint8x16_t vb1 = vec_vsx_ld(16, ref_ptr);
const int16x8_t a0 = unpack_to_s16_h(va0);
const int16x8_t b0 = unpack_to_s16_h(vb0);
*sum_squared = ss;
}
-static INLINE void variance(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int w, int h, uint32_t *sse,
- int *sum) {
+static INLINE void variance(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride, int w,
+ int h, uint32_t *sse, int *sum) {
int i;
int32x4_t s = vec_splat_s32(0);
switch (w) {
case 4:
for (i = 0; i < h / 2; ++i) {
- const int16x8_t a0 = unpack_to_s16_h(read4x2(a, a_stride));
- const int16x8_t b0 = unpack_to_s16_h(read4x2(b, b_stride));
+ const int16x8_t a0 = unpack_to_s16_h(read4x2(src_ptr, src_stride));
+ const int16x8_t b0 = unpack_to_s16_h(read4x2(ref_ptr, ref_stride));
const int16x8_t d = vec_sub(a0, b0);
s = vec_sum4s(d, s);
ss = vec_msum(d, d, ss);
- a += a_stride * 2;
- b += b_stride * 2;
+ src_ptr += src_stride * 2;
+ ref_ptr += ref_stride * 2;
}
break;
case 8:
for (i = 0; i < h; ++i) {
- const int16x8_t a0 = unpack_to_s16_h(vec_vsx_ld(0, a));
- const int16x8_t b0 = unpack_to_s16_h(vec_vsx_ld(0, b));
+ const int16x8_t a0 = unpack_to_s16_h(vec_vsx_ld(0, src_ptr));
+ const int16x8_t b0 = unpack_to_s16_h(vec_vsx_ld(0, ref_ptr));
const int16x8_t d = vec_sub(a0, b0);
s = vec_sum4s(d, s);
ss = vec_msum(d, d, ss);
- a += a_stride;
- b += b_stride;
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
}
break;
case 16:
for (i = 0; i < h; ++i) {
- const uint8x16_t va = vec_vsx_ld(0, a);
- const uint8x16_t vb = vec_vsx_ld(0, b);
+ const uint8x16_t va = vec_vsx_ld(0, src_ptr);
+ const uint8x16_t vb = vec_vsx_ld(0, ref_ptr);
const int16x8_t a0 = unpack_to_s16_h(va);
const int16x8_t b0 = unpack_to_s16_h(vb);
const int16x8_t a1 = unpack_to_s16_l(va);
s = vec_sum4s(d1, s);
ss = vec_msum(d1, d1, ss);
- a += a_stride;
- b += b_stride;
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
}
break;
case 32:
for (i = 0; i < h; ++i) {
- variance_inner_32(a, b, &ss, &s);
- a += a_stride;
- b += b_stride;
+ variance_inner_32(src_ptr, ref_ptr, &ss, &s);
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
}
break;
case 64:
for (i = 0; i < h; ++i) {
- variance_inner_32(a, b, &ss, &s);
- variance_inner_32(a + 32, b + 32, &ss, &s);
+ variance_inner_32(src_ptr, ref_ptr, &ss, &s);
+ variance_inner_32(src_ptr + 32, ref_ptr + 32, &ss, &s);
- a += a_stride;
- b += b_stride;
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
}
break;
}
* and returns that value using pass-by-reference instead of returning
* sse - sum^2 / w*h
*/
-#define GET_VAR(W, H) \
- void vpx_get##W##x##H##var_vsx(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- uint32_t *sse, int *sum) { \
- variance(a, a_stride, b, b_stride, W, H, sse, sum); \
+#define GET_VAR(W, H) \
+ void vpx_get##W##x##H##var_vsx(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ uint32_t *sse, int *sum) { \
+ variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, sum); \
}
/* Identical to the variance call except it does not calculate the
* sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
* variable.
*/
-#define MSE(W, H) \
- uint32_t vpx_mse##W##x##H##_vsx(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- uint32_t *sse) { \
- int sum; \
- variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- return *sse; \
+#define MSE(W, H) \
+ uint32_t vpx_mse##W##x##H##_vsx(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ uint32_t *sse) { \
+ int sum; \
+ variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \
+ return *sse; \
}
-#define VAR(W, H) \
- uint32_t vpx_variance##W##x##H##_vsx(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- uint32_t *sse) { \
- int sum; \
- variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
+#define VAR(W, H) \
+ uint32_t vpx_variance##W##x##H##_vsx(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ uint32_t *sse) { \
+ int sum; \
+ variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \
+ return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
}
#define VARIANCES(W, H) VAR(W, H)
{ 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 },
};
-uint32_t vpx_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride) {
+uint32_t vpx_get4x4sse_cs_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride) {
int distortion = 0;
int r, c;
for (r = 0; r < 4; ++r) {
for (c = 0; c < 4; ++c) {
- int diff = a[c] - b[c];
+ int diff = src_ptr[c] - ref_ptr[c];
distortion += diff * diff;
}
- a += a_stride;
- b += b_stride;
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
}
return distortion;
}
-uint32_t vpx_get_mb_ss_c(const int16_t *a) {
+uint32_t vpx_get_mb_ss_c(const int16_t *src_ptr) {
unsigned int i, sum = 0;
for (i = 0; i < 256; ++i) {
- sum += a[i] * a[i];
+ sum += src_ptr[i] * src_ptr[i];
}
return sum;
}
-static void variance(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int w, int h, uint32_t *sse, int *sum) {
+static void variance(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride, int w, int h,
+ uint32_t *sse, int *sum) {
int i, j;
*sum = 0;
for (i = 0; i < h; ++i) {
for (j = 0; j < w; ++j) {
- const int diff = a[j] - b[j];
+ const int diff = src_ptr[j] - ref_ptr[j];
*sum += diff;
*sse += diff * diff;
}
- a += a_stride;
- b += b_stride;
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
}
}
// taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
// applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
// It defines the offset required to move from one input to the next.
-static void var_filter_block2d_bil_first_pass(const uint8_t *a, uint16_t *b,
- unsigned int src_pixels_per_line,
- int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const uint8_t *filter) {
+static void var_filter_block2d_bil_first_pass(
+ const uint8_t *src_ptr, uint16_t *ref_ptr, unsigned int src_pixels_per_line,
+ int pixel_step, unsigned int output_height, unsigned int output_width,
+ const uint8_t *filter) {
unsigned int i, j;
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; ++j) {
- b[j] = ROUND_POWER_OF_TWO(
- (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
+ ref_ptr[j] = ROUND_POWER_OF_TWO(
+ (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
+ FILTER_BITS);
- ++a;
+ ++src_ptr;
}
- a += src_pixels_per_line - output_width;
- b += output_width;
+ src_ptr += src_pixels_per_line - output_width;
+ ref_ptr += output_width;
}
}
// filter is applied horizontally (pixel_step = 1) or vertically
// (pixel_step = stride). It defines the offset required to move from one input
// to the next. Output is 8-bit.
-static void var_filter_block2d_bil_second_pass(const uint16_t *a, uint8_t *b,
- unsigned int src_pixels_per_line,
- unsigned int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const uint8_t *filter) {
+static void var_filter_block2d_bil_second_pass(
+ const uint16_t *src_ptr, uint8_t *ref_ptr, unsigned int src_pixels_per_line,
+ unsigned int pixel_step, unsigned int output_height,
+ unsigned int output_width, const uint8_t *filter) {
unsigned int i, j;
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; ++j) {
- b[j] = ROUND_POWER_OF_TWO(
- (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
- ++a;
+ ref_ptr[j] = ROUND_POWER_OF_TWO(
+ (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
+ FILTER_BITS);
+ ++src_ptr;
}
- a += src_pixels_per_line - output_width;
- b += output_width;
+ src_ptr += src_pixels_per_line - output_width;
+ ref_ptr += output_width;
}
}
-#define VAR(W, H) \
- uint32_t vpx_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- uint32_t *sse) { \
- int sum; \
- variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
+#define VAR(W, H) \
+ uint32_t vpx_variance##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ uint32_t *sse) { \
+ int sum; \
+ variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \
+ return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
}
-#define SUBPIX_VAR(W, H) \
- uint32_t vpx_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *a, int a_stride, int xoffset, int yoffset, \
- const uint8_t *b, int b_stride, uint32_t *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint8_t temp2[H * W]; \
- \
- var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
- bilinear_filters[xoffset]); \
- var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- bilinear_filters[yoffset]); \
- \
- return vpx_variance##W##x##H##_c(temp2, W, b, b_stride, sse); \
+#define SUBPIX_VAR(W, H) \
+ uint32_t vpx_sub_pixel_variance##W##x##H##_c( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint8_t temp2[H * W]; \
+ \
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \
+ W, bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters[y_offset]); \
+ \
+ return vpx_variance##W##x##H##_c(temp2, W, ref_ptr, ref_stride, sse); \
}
-#define SUBPIX_AVG_VAR(W, H) \
- uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c( \
- const uint8_t *a, int a_stride, int xoffset, int yoffset, \
- const uint8_t *b, int b_stride, uint32_t *sse, \
- const uint8_t *second_pred) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint8_t temp2[H * W]; \
- DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
- \
- var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
- bilinear_filters[xoffset]); \
- var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- bilinear_filters[yoffset]); \
- \
- vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W); \
- \
- return vpx_variance##W##x##H##_c(temp3, W, b, b_stride, sse); \
+#define SUBPIX_AVG_VAR(W, H) \
+ uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \
+ const uint8_t *second_pred) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint8_t temp2[H * W]; \
+ DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
+ \
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \
+ W, bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters[y_offset]); \
+ \
+ vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W); \
+ \
+ return vpx_variance##W##x##H##_c(temp3, W, ref_ptr, ref_stride, sse); \
}
/* Identical to the variance call except it takes an additional parameter, sum,
* and returns that value using pass-by-reference instead of returning
* sse - sum^2 / w*h
*/
-#define GET_VAR(W, H) \
- void vpx_get##W##x##H##var_c(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, uint32_t *sse, \
- int *sum) { \
- variance(a, a_stride, b, b_stride, W, H, sse, sum); \
+#define GET_VAR(W, H) \
+ void vpx_get##W##x##H##var_c(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ uint32_t *sse, int *sum) { \
+ variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, sum); \
}
/* Identical to the variance call except it does not calculate the
* sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
* variable.
*/
-#define MSE(W, H) \
- uint32_t vpx_mse##W##x##H##_c(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- uint32_t *sse) { \
- int sum; \
- variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- return *sse; \
+#define MSE(W, H) \
+ uint32_t vpx_mse##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ uint32_t *sse) { \
+ int sum; \
+ variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \
+ return *sse; \
}
/* All three forms of the variance are available in the same sizes. */
}
#if CONFIG_VP9_HIGHBITDEPTH
-static void highbd_variance64(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride, int w, int h,
- uint64_t *sse, int64_t *sum) {
+static void highbd_variance64(const uint8_t *src8_ptr, int src_stride,
+ const uint8_t *ref8_ptr, int ref_stride, int w,
+ int h, uint64_t *sse, int64_t *sum) {
int i, j;
- uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- uint16_t *b = CONVERT_TO_SHORTPTR(b8);
+ uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src8_ptr);
+ uint16_t *ref_ptr = CONVERT_TO_SHORTPTR(ref8_ptr);
*sum = 0;
*sse = 0;
for (i = 0; i < h; ++i) {
for (j = 0; j < w; ++j) {
- const int diff = a[j] - b[j];
+ const int diff = src_ptr[j] - ref_ptr[j];
*sum += diff;
*sse += diff * diff;
}
- a += a_stride;
- b += b_stride;
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
}
}
-static void highbd_8_variance(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride, int w, int h,
- uint32_t *sse, int *sum) {
+static void highbd_8_variance(const uint8_t *src8_ptr, int src_stride,
+ const uint8_t *ref8_ptr, int ref_stride, int w,
+ int h, uint32_t *sse, int *sum) {
uint64_t sse_long = 0;
int64_t sum_long = 0;
- highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
+ highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long,
+ &sum_long);
*sse = (uint32_t)sse_long;
*sum = (int)sum_long;
}
-static void highbd_10_variance(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride, int w, int h,
- uint32_t *sse, int *sum) {
+static void highbd_10_variance(const uint8_t *src8_ptr, int src_stride,
+ const uint8_t *ref8_ptr, int ref_stride, int w,
+ int h, uint32_t *sse, int *sum) {
uint64_t sse_long = 0;
int64_t sum_long = 0;
- highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
+ highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long,
+ &sum_long);
*sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
*sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
}
-static void highbd_12_variance(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride, int w, int h,
- uint32_t *sse, int *sum) {
+static void highbd_12_variance(const uint8_t *src8_ptr, int src_stride,
+ const uint8_t *ref8_ptr, int ref_stride, int w,
+ int h, uint32_t *sse, int *sum) {
uint64_t sse_long = 0;
int64_t sum_long = 0;
- highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
+ highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long,
+ &sum_long);
*sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
*sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
}
-#define HIGHBD_VAR(W, H) \
- uint32_t vpx_highbd_8_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- uint32_t *sse) { \
- int sum; \
- highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
- } \
- \
- uint32_t vpx_highbd_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- uint32_t *sse) { \
- int sum; \
- int64_t var; \
- highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
- return (var >= 0) ? (uint32_t)var : 0; \
- } \
- \
- uint32_t vpx_highbd_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- uint32_t *sse) { \
- int sum; \
- int64_t var; \
- highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
- return (var >= 0) ? (uint32_t)var : 0; \
+#define HIGHBD_VAR(W, H) \
+ uint32_t vpx_highbd_8_variance##W##x##H##_c( \
+ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
+ int ref_stride, uint32_t *sse) { \
+ int sum; \
+ highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
+ &sum); \
+ return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
+ } \
+ \
+ uint32_t vpx_highbd_10_variance##W##x##H##_c( \
+ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
+ int ref_stride, uint32_t *sse) { \
+ int sum; \
+ int64_t var; \
+ highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
+ &sum); \
+ var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
+ return (var >= 0) ? (uint32_t)var : 0; \
+ } \
+ \
+ uint32_t vpx_highbd_12_variance##W##x##H##_c( \
+ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
+ int ref_stride, uint32_t *sse) { \
+ int sum; \
+ int64_t var; \
+ highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
+ &sum); \
+ var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
+ return (var >= 0) ? (uint32_t)var : 0; \
}
-#define HIGHBD_GET_VAR(S) \
- void vpx_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- uint32_t *sse, int *sum) { \
- highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
- } \
- \
- void vpx_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- uint32_t *sse, int *sum) { \
- highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
- } \
- \
- void vpx_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- uint32_t *sse, int *sum) { \
- highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
+#define HIGHBD_GET_VAR(S) \
+ void vpx_highbd_8_get##S##x##S##var_c( \
+ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
+ int ref_stride, uint32_t *sse, int *sum) { \
+ highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \
+ sum); \
+ } \
+ \
+ void vpx_highbd_10_get##S##x##S##var_c( \
+ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
+ int ref_stride, uint32_t *sse, int *sum) { \
+ highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \
+ sum); \
+ } \
+ \
+ void vpx_highbd_12_get##S##x##S##var_c( \
+ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
+ int ref_stride, uint32_t *sse, int *sum) { \
+ highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \
+ sum); \
}
-#define HIGHBD_MSE(W, H) \
- uint32_t vpx_highbd_8_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- uint32_t *sse) { \
- int sum; \
- highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
- return *sse; \
- } \
- \
- uint32_t vpx_highbd_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- uint32_t *sse) { \
- int sum; \
- highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
- return *sse; \
- } \
- \
- uint32_t vpx_highbd_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- uint32_t *sse) { \
- int sum; \
- highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
- return *sse; \
+#define HIGHBD_MSE(W, H) \
+ uint32_t vpx_highbd_8_mse##W##x##H##_c( \
+ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
+ int ref_stride, uint32_t *sse) { \
+ int sum; \
+ highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
+ &sum); \
+ return *sse; \
+ } \
+ \
+ uint32_t vpx_highbd_10_mse##W##x##H##_c( \
+ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
+ int ref_stride, uint32_t *sse) { \
+ int sum; \
+ highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
+ &sum); \
+ return *sse; \
+ } \
+ \
+ uint32_t vpx_highbd_12_mse##W##x##H##_c( \
+ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
+ int ref_stride, uint32_t *sse) { \
+ int sum; \
+ highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
+ &sum); \
+ return *sse; \
}
static void highbd_var_filter_block2d_bil_first_pass(
}
}
-#define HIGHBD_SUBPIX_VAR(W, H) \
- uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, uint32_t *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- \
- highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- bilinear_filters[yoffset]); \
- \
- return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
- dst, dst_stride, sse); \
- } \
- \
- uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, uint32_t *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- \
- highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- bilinear_filters[yoffset]); \
- \
- return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
- dst, dst_stride, sse); \
- } \
- \
- uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, uint32_t *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- \
- highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- bilinear_filters[yoffset]); \
- \
- return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
- dst, dst_stride, sse); \
+#define HIGHBD_SUBPIX_VAR(W, H) \
+ uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint16_t temp2[H * W]; \
+ \
+ highbd_var_filter_block2d_bil_first_pass( \
+ src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
+ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters[y_offset]); \
+ \
+ return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
+ ref_ptr, ref_stride, sse); \
+ } \
+ \
+ uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint16_t temp2[H * W]; \
+ \
+ highbd_var_filter_block2d_bil_first_pass( \
+ src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
+ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters[y_offset]); \
+ \
+ return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
+ ref_ptr, ref_stride, sse); \
+ } \
+ \
+ uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint16_t temp2[H * W]; \
+ \
+ highbd_var_filter_block2d_bil_first_pass( \
+ src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
+ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters[y_offset]); \
+ \
+ return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
+ ref_ptr, ref_stride, sse); \
}
-#define HIGHBD_SUBPIX_AVG_VAR(W, H) \
- uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, uint32_t *sse, \
- const uint8_t *second_pred) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
- \
- highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- bilinear_filters[yoffset]); \
- \
- vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \
- temp2, W); \
- \
- return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
- dst, dst_stride, sse); \
- } \
- \
- uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, uint32_t *sse, \
- const uint8_t *second_pred) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
- \
- highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- bilinear_filters[yoffset]); \
- \
- vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \
- temp2, W); \
- \
- return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
- dst, dst_stride, sse); \
- } \
- \
- uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, uint32_t *sse, \
- const uint8_t *second_pred) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
- \
- highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- bilinear_filters[yoffset]); \
- \
- vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \
- temp2, W); \
- \
- return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
- dst, dst_stride, sse); \
+#define HIGHBD_SUBPIX_AVG_VAR(W, H) \
+ uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \
+ const uint8_t *second_pred) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint16_t temp2[H * W]; \
+ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
+ \
+ highbd_var_filter_block2d_bil_first_pass( \
+ src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
+ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters[y_offset]); \
+ \
+ vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \
+ temp2, W); \
+ \
+ return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
+ ref_ptr, ref_stride, sse); \
+ } \
+ \
+ uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \
+ const uint8_t *second_pred) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint16_t temp2[H * W]; \
+ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
+ \
+ highbd_var_filter_block2d_bil_first_pass( \
+ src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
+ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters[y_offset]); \
+ \
+ vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \
+ temp2, W); \
+ \
+ return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
+ ref_ptr, ref_stride, sse); \
+ } \
+ \
+ uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \
+ const uint8_t *second_pred) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint16_t temp2[H * W]; \
+ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
+ \
+ highbd_var_filter_block2d_bil_first_pass( \
+ src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
+ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters[y_offset]); \
+ \
+ vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \
+ temp2, W); \
+ \
+ return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
+ ref_ptr, ref_stride, sse); \
}
/* All three forms of the variance are available in the same sizes. */
#define FILTER_BITS 7
#define FILTER_WEIGHT 128
-typedef unsigned int (*vpx_sad_fn_t)(const uint8_t *a, int a_stride,
- const uint8_t *b_ptr, int b_stride);
+typedef unsigned int (*vpx_sad_fn_t)(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride);
-typedef unsigned int (*vpx_sad_avg_fn_t)(const uint8_t *a_ptr, int a_stride,
- const uint8_t *b_ptr, int b_stride,
+typedef unsigned int (*vpx_sad_avg_fn_t)(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
const uint8_t *second_pred);
-typedef void (*vp8_copy32xn_fn_t)(const uint8_t *a, int a_stride, uint8_t *b,
- int b_stride, int n);
+typedef void (*vp8_copy32xn_fn_t)(const uint8_t *src_ptr, int src_stride,
+ uint8_t *ref_ptr, int ref_stride, int n);
-typedef void (*vpx_sad_multi_fn_t)(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
+typedef void (*vpx_sad_multi_fn_t)(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sad_array);
-typedef void (*vpx_sad_multi_d_fn_t)(const uint8_t *a, int a_stride,
+typedef void (*vpx_sad_multi_d_fn_t)(const uint8_t *src_ptr, int src_stride,
const uint8_t *const b_array[],
- int b_stride, unsigned int *sad_array);
+ int ref_stride, unsigned int *sad_array);
-typedef unsigned int (*vpx_variance_fn_t)(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- unsigned int *sse);
+typedef unsigned int (*vpx_variance_fn_t)(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t *ref_ptr,
+ int ref_stride, unsigned int *sse);
-typedef unsigned int (*vpx_subpixvariance_fn_t)(const uint8_t *a, int a_stride,
- int xoffset, int yoffset,
- const uint8_t *b, int b_stride,
- unsigned int *sse);
+typedef unsigned int (*vpx_subpixvariance_fn_t)(
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,
+ const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
typedef unsigned int (*vpx_subp_avg_variance_fn_t)(
- const uint8_t *a_ptr, int a_stride, int xoffset, int yoffset,
- const uint8_t *b_ptr, int b_stride, unsigned int *sse,
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,
+ const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
const uint8_t *second_pred);
+
#if CONFIG_VP8
typedef struct variance_vtable {
vpx_sad_fn_t sdf;
#
# Variance
#
-add_proto qw/unsigned int vpx_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+add_proto qw/unsigned int vpx_variance64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance64x64 sse2 avx2 neon msa mmi vsx/;
-add_proto qw/unsigned int vpx_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+add_proto qw/unsigned int vpx_variance64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance64x32 sse2 avx2 neon msa mmi vsx/;
-add_proto qw/unsigned int vpx_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+add_proto qw/unsigned int vpx_variance32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance32x64 sse2 avx2 neon msa mmi vsx/;
-add_proto qw/unsigned int vpx_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+add_proto qw/unsigned int vpx_variance32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance32x32 sse2 avx2 neon msa mmi vsx/;
-add_proto qw/unsigned int vpx_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+add_proto qw/unsigned int vpx_variance32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance32x16 sse2 avx2 neon msa mmi vsx/;
-add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance16x32 sse2 avx2 neon msa mmi vsx/;
-add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance16x16 sse2 avx2 neon msa mmi vsx/;
-add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance16x8 sse2 avx2 neon msa mmi vsx/;
-add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance8x16 sse2 neon msa mmi vsx/;
-add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance8x8 sse2 neon msa mmi vsx/;
-add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance8x4 sse2 neon msa mmi vsx/;
-add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance4x8 sse2 neon msa mmi vsx/;
-add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance4x4 sse2 neon msa mmi vsx/;
#
# Specialty Variance
#
-add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
specialize qw/vpx_get16x16var sse2 avx2 neon msa vsx/;
-add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
specialize qw/vpx_get8x8var sse2 neon msa vsx/;
-add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_mse16x16 sse2 avx2 neon msa mmi vsx/;
-add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_mse16x8 sse2 avx2 msa mmi vsx/;
-add_proto qw/unsigned int vpx_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+add_proto qw/unsigned int vpx_mse8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_mse8x16 sse2 msa mmi vsx/;
-add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_mse8x8 sse2 msa mmi vsx/;
add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *";
specialize qw/vpx_get_mb_ss sse2 msa vsx/;
-add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
+add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride";
specialize qw/vpx_get4x4sse_cs neon msa vsx/;
add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
#
# Subpixel Variance
#
-add_proto qw/uint32_t vpx_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+add_proto qw/uint32_t vpx_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance64x64 avx2 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+add_proto qw/uint32_t vpx_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance64x32 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+add_proto qw/uint32_t vpx_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance32x64 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+add_proto qw/uint32_t vpx_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance32x32 avx2 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+add_proto qw/uint32_t vpx_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance32x16 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+add_proto qw/uint32_t vpx_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance16x32 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+add_proto qw/uint32_t vpx_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance16x16 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+add_proto qw/uint32_t vpx_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance16x8 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+add_proto qw/uint32_t vpx_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance8x16 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+add_proto qw/uint32_t vpx_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance8x8 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+add_proto qw/uint32_t vpx_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance8x4 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+add_proto qw/uint32_t vpx_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance4x8 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+add_proto qw/uint32_t vpx_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance4x4 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance64x64 neon avx2 msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance64x32 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance32x64 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance32x32 neon avx2 msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance32x16 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance16x32 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance16x16 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance16x8 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance8x16 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance8x8 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance8x4 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance4x8 neon msa mmi sse2 ssse3/;
-add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance4x4 neon msa mmi sse2 ssse3/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
- add_proto qw/unsigned int vpx_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_variance64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_12_variance64x64 sse2/;
- add_proto qw/unsigned int vpx_highbd_12_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_variance64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_12_variance64x32 sse2/;
- add_proto qw/unsigned int vpx_highbd_12_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_variance32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_12_variance32x64 sse2/;
- add_proto qw/unsigned int vpx_highbd_12_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_variance32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_12_variance32x32 sse2/;
- add_proto qw/unsigned int vpx_highbd_12_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_variance32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_12_variance32x16 sse2/;
- add_proto qw/unsigned int vpx_highbd_12_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_variance16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_12_variance16x32 sse2/;
- add_proto qw/unsigned int vpx_highbd_12_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_variance16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_12_variance16x16 sse2/;
- add_proto qw/unsigned int vpx_highbd_12_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_variance16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_12_variance16x8 sse2/;
- add_proto qw/unsigned int vpx_highbd_12_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_variance8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_12_variance8x16 sse2/;
- add_proto qw/unsigned int vpx_highbd_12_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_variance8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_12_variance8x8 sse2/;
- add_proto qw/unsigned int vpx_highbd_12_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_12_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_12_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_variance8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_variance4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_variance4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_10_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_variance64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_10_variance64x64 sse2/;
- add_proto qw/unsigned int vpx_highbd_10_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_variance64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_10_variance64x32 sse2/;
- add_proto qw/unsigned int vpx_highbd_10_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_variance32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_10_variance32x64 sse2/;
- add_proto qw/unsigned int vpx_highbd_10_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_variance32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_10_variance32x32 sse2/;
- add_proto qw/unsigned int vpx_highbd_10_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_variance32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_10_variance32x16 sse2/;
- add_proto qw/unsigned int vpx_highbd_10_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_variance16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_10_variance16x32 sse2/;
- add_proto qw/unsigned int vpx_highbd_10_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_variance16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_10_variance16x16 sse2/;
- add_proto qw/unsigned int vpx_highbd_10_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_variance16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_10_variance16x8 sse2/;
- add_proto qw/unsigned int vpx_highbd_10_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_variance8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_10_variance8x16 sse2/;
- add_proto qw/unsigned int vpx_highbd_10_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_variance8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_10_variance8x8 sse2/;
- add_proto qw/unsigned int vpx_highbd_10_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_10_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_10_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_variance8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_variance4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_variance4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_8_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_variance64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_8_variance64x64 sse2/;
- add_proto qw/unsigned int vpx_highbd_8_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_variance64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_8_variance64x32 sse2/;
- add_proto qw/unsigned int vpx_highbd_8_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_variance32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_8_variance32x64 sse2/;
- add_proto qw/unsigned int vpx_highbd_8_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_variance32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_8_variance32x32 sse2/;
- add_proto qw/unsigned int vpx_highbd_8_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_variance32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_8_variance32x16 sse2/;
- add_proto qw/unsigned int vpx_highbd_8_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_variance16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_8_variance16x32 sse2/;
- add_proto qw/unsigned int vpx_highbd_8_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_variance16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_8_variance16x16 sse2/;
- add_proto qw/unsigned int vpx_highbd_8_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_variance16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_8_variance16x8 sse2/;
- add_proto qw/unsigned int vpx_highbd_8_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_variance8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_8_variance8x16 sse2/;
- add_proto qw/unsigned int vpx_highbd_8_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_variance8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_8_variance8x8 sse2/;
- add_proto qw/unsigned int vpx_highbd_8_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_8_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_8_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_variance8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_variance4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_variance4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/void vpx_highbd_8_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- add_proto qw/void vpx_highbd_8_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+ add_proto qw/void vpx_highbd_8_get16x16var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+ add_proto qw/void vpx_highbd_8_get8x8var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- add_proto qw/void vpx_highbd_10_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- add_proto qw/void vpx_highbd_10_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+ add_proto qw/void vpx_highbd_10_get16x16var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+ add_proto qw/void vpx_highbd_10_get8x8var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- add_proto qw/void vpx_highbd_12_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- add_proto qw/void vpx_highbd_12_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+ add_proto qw/void vpx_highbd_12_get16x16var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+ add_proto qw/void vpx_highbd_12_get8x8var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- add_proto qw/unsigned int vpx_highbd_8_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_mse16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_8_mse16x16 sse2/;
- add_proto qw/unsigned int vpx_highbd_8_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_8_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_8_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_mse16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_mse8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_mse8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_8_mse8x8 sse2/;
- add_proto qw/unsigned int vpx_highbd_10_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_mse16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_10_mse16x16 sse2/;
- add_proto qw/unsigned int vpx_highbd_10_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_10_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_10_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_mse16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_mse8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_mse8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_10_mse8x8 sse2/;
- add_proto qw/unsigned int vpx_highbd_12_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_mse16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_12_mse16x16 sse2/;
- add_proto qw/unsigned int vpx_highbd_12_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_12_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int vpx_highbd_12_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_mse16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_mse8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_mse8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_highbd_12_mse8x8 sse2/;
add_proto qw/void vpx_highbd_comp_avg_pred/, "uint16_t *comp_pred, const uint16_t *pred, int width, int height, const uint16_t *ref, int ref_stride";
#
# Subpixel Variance
#
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_12_sub_pixel_variance64x64 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_12_sub_pixel_variance64x32 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_12_sub_pixel_variance32x64 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_12_sub_pixel_variance32x32 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_12_sub_pixel_variance32x16 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_12_sub_pixel_variance16x32 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_12_sub_pixel_variance16x16 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_12_sub_pixel_variance16x8 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_12_sub_pixel_variance8x16 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_12_sub_pixel_variance8x8 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_12_sub_pixel_variance8x4 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_10_sub_pixel_variance64x64 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_10_sub_pixel_variance64x32 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_10_sub_pixel_variance32x64 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_10_sub_pixel_variance32x32 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_10_sub_pixel_variance32x16 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_10_sub_pixel_variance16x32 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_10_sub_pixel_variance16x16 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_10_sub_pixel_variance16x8 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_10_sub_pixel_variance8x16 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_10_sub_pixel_variance8x8 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_10_sub_pixel_variance8x4 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_8_sub_pixel_variance64x64 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_8_sub_pixel_variance64x32 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_8_sub_pixel_variance32x64 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_8_sub_pixel_variance32x32 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_8_sub_pixel_variance32x16 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_8_sub_pixel_variance16x32 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_8_sub_pixel_variance16x16 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_8_sub_pixel_variance16x8 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_8_sub_pixel_variance8x16 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_8_sub_pixel_variance8x8 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_highbd_8_sub_pixel_variance8x4 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x64 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x32 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x64 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x32 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x16 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x32 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x16 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x8 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x16 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x8 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x4 sse2/;
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x64 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x32 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x64 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x32 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x16 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x32 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x16 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x8 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x16 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x8 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x4 sse2/;
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x64 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x32 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x64 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x32 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x16 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x32 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x16 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x8 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x16 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x8 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x4 sse2/;
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
} # CONFIG_VP9_HIGHBITDEPTH
; int vpx_sub_pixel_varianceNxh(const uint8_t *src, ptrdiff_t src_stride,
; int x_offset, int y_offset,
-; const uint8_t *dst, ptrdiff_t dst_stride,
+; const uint8_t *ref, ptrdiff_t ref_stride,
; int height, unsigned int *sse);
;
; This function returns the SE and stores SSE in the given pointer.
-%macro SUM_SSE 6 ; src1, dst1, src2, dst2, sum, sse
+%macro SUM_SSE 6 ; src1, ref1, src2, ref2, sum, sse
psubw %3, %4
psubw %1, %2
mova %4, %3 ; make copies to manipulate to calc sum
%if %2 == 1 ; avg
cglobal highbd_sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, height, sse
- %define sec_str sec_strideq
+ ref, ref_stride, \
+ second_pred, second_stride, height, sse
+ %define second_str second_strideq
%else
cglobal highbd_sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, \
x_offset, y_offset, \
- dst, dst_stride, height, sse
+ ref, ref_stride, height, sse
%endif
%define block_height heightd
%define bilin_filter sseq
%if %2 == 1 ; avg
cglobal highbd_sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, height, sse
+ ref, ref_stride, \
+ second_pred, second_stride, height, sse
%define block_height dword heightm
- %define sec_str sec_stridemp
+ %define second_str second_stridemp
%else
cglobal highbd_sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
x_offset, y_offset, \
- dst, dst_stride, height, sse
+ ref, ref_stride, height, sse
%define block_height heightd
%endif
%if %2 == 1 ; avg
cglobal highbd_sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, height, sse
+ ref, ref_stride, \
+ second_pred, second_stride, height, sse
%define block_height dword heightm
- %define sec_str sec_stridemp
+ %define second_str second_stridemp
%else
cglobal highbd_sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
x_offset, y_offset, \
- dst, dst_stride, height, sse
+ ref, ref_stride, height, sse
%define block_height heightd
%endif
sar block_height, 1
%endif
%if %2 == 1 ; avg
- shl sec_str, 1
+ shl second_str, 1
%endif
; FIXME(rbultje) replace by jumptable?
%if %1 == 16
movu m0, [srcq]
movu m2, [srcq + 16]
- mova m1, [dstq]
- mova m3, [dstq + 16]
+ mova m1, [refq]
+ mova m3, [refq + 16]
%if %2 == 1 ; avg
- pavgw m0, [secq]
- pavgw m2, [secq+16]
+ pavgw m0, [second_predq]
+ pavgw m2, [second_predq+16]
%endif
SUM_SSE m0, m1, m2, m3, m6, m7
lea srcq, [srcq + src_strideq*2]
- lea dstq, [dstq + dst_strideq*2]
+ lea refq, [refq + ref_strideq*2]
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
%else ; %1 < 16
movu m0, [srcq]
movu m2, [srcq + src_strideq*2]
- mova m1, [dstq]
- mova m3, [dstq + dst_strideq*2]
+ mova m1, [refq]
+ mova m3, [refq + ref_strideq*2]
%if %2 == 1 ; avg
- pavgw m0, [secq]
- add secq, sec_str
- pavgw m2, [secq]
+ pavgw m0, [second_predq]
+ add second_predq, second_str
+ pavgw m2, [second_predq]
%endif
SUM_SSE m0, m1, m2, m3, m6, m7
lea srcq, [srcq + src_strideq*4]
- lea dstq, [dstq + dst_strideq*4]
+ lea refq, [refq + ref_strideq*4]
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
%endif
dec block_height
movu m1, [srcq+16]
movu m4, [srcq+src_strideq*2]
movu m5, [srcq+src_strideq*2+16]
- mova m2, [dstq]
- mova m3, [dstq+16]
+ mova m2, [refq]
+ mova m3, [refq+16]
pavgw m0, m4
pavgw m1, m5
%if %2 == 1 ; avg
- pavgw m0, [secq]
- pavgw m1, [secq+16]
+ pavgw m0, [second_predq]
+ pavgw m1, [second_predq+16]
%endif
SUM_SSE m0, m2, m1, m3, m6, m7
lea srcq, [srcq + src_strideq*2]
- lea dstq, [dstq + dst_strideq*2]
+ lea refq, [refq + ref_strideq*2]
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
%else ; %1 < 16
movu m0, [srcq]
movu m1, [srcq+src_strideq*2]
movu m5, [srcq+src_strideq*4]
- mova m2, [dstq]
- mova m3, [dstq+dst_strideq*2]
+ mova m2, [refq]
+ mova m3, [refq+ref_strideq*2]
pavgw m0, m1
pavgw m1, m5
%if %2 == 1 ; avg
- pavgw m0, [secq]
- add secq, sec_str
- pavgw m1, [secq]
+ pavgw m0, [second_predq]
+ add second_predq, second_str
+ pavgw m1, [second_predq]
%endif
SUM_SSE m0, m2, m1, m3, m6, m7
lea srcq, [srcq + src_strideq*4]
- lea dstq, [dstq + dst_strideq*4]
+ lea refq, [refq + ref_strideq*4]
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
%endif
dec block_height
movu m1, [srcq + 16]
movu m4, [srcq+src_strideq*2]
movu m5, [srcq+src_strideq*2+16]
- mova m2, [dstq]
- mova m3, [dstq+16]
+ mova m2, [refq]
+ mova m3, [refq+16]
; FIXME(rbultje) instead of out=((num-x)*in1+x*in2+rnd)>>log2(num), we can
; also do out=in1+(((num-x)*(in2-in1)+rnd)>>log2(num)). Total number of
; instructions is the same (5), but it is 1 mul instead of 2, so might be
psrlw m1, 4
psrlw m0, 4
%if %2 == 1 ; avg
- pavgw m0, [secq]
- pavgw m1, [secq+16]
+ pavgw m0, [second_predq]
+ pavgw m1, [second_predq+16]
%endif
SUM_SSE m0, m2, m1, m3, m6, m7
lea srcq, [srcq + src_strideq*2]
- lea dstq, [dstq + dst_strideq*2]
+ lea refq, [refq + ref_strideq*2]
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
%else ; %1 < 16
movu m0, [srcq]
movu m1, [srcq+src_strideq*2]
movu m5, [srcq+src_strideq*4]
mova m4, m1
- mova m2, [dstq]
- mova m3, [dstq+dst_strideq*2]
+ mova m2, [refq]
+ mova m3, [refq+ref_strideq*2]
pmullw m1, filter_y_a
pmullw m5, filter_y_b
paddw m1, filter_rnd
psrlw m1, 4
psrlw m0, 4
%if %2 == 1 ; avg
- pavgw m0, [secq]
- add secq, sec_str
- pavgw m1, [secq]
+ pavgw m0, [second_predq]
+ add second_predq, second_str
+ pavgw m1, [second_predq]
%endif
SUM_SSE m0, m2, m1, m3, m6, m7
lea srcq, [srcq + src_strideq*4]
- lea dstq, [dstq + dst_strideq*4]
+ lea refq, [refq + ref_strideq*4]
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
%endif
dec block_height
movu m1, [srcq + 16]
movu m4, [srcq + 2]
movu m5, [srcq + 18]
- mova m2, [dstq]
- mova m3, [dstq + 16]
+ mova m2, [refq]
+ mova m3, [refq + 16]
pavgw m0, m4
pavgw m1, m5
%if %2 == 1 ; avg
- pavgw m0, [secq]
- pavgw m1, [secq+16]
+ pavgw m0, [second_predq]
+ pavgw m1, [second_predq+16]
%endif
SUM_SSE m0, m2, m1, m3, m6, m7
lea srcq, [srcq + src_strideq*2]
- lea dstq, [dstq + dst_strideq*2]
+ lea refq, [refq + ref_strideq*2]
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
%else ; %1 < 16
movu m0, [srcq]
movu m1, [srcq + src_strideq*2]
movu m4, [srcq + 2]
movu m5, [srcq + src_strideq*2 + 2]
- mova m2, [dstq]
- mova m3, [dstq + dst_strideq*2]
+ mova m2, [refq]
+ mova m3, [refq + ref_strideq*2]
pavgw m0, m4
pavgw m1, m5
%if %2 == 1 ; avg
- pavgw m0, [secq]
- add secq, sec_str
- pavgw m1, [secq]
+ pavgw m0, [second_predq]
+ add second_predq, second_str
+ pavgw m1, [second_predq]
%endif
SUM_SSE m0, m2, m1, m3, m6, m7
lea srcq, [srcq + src_strideq*4]
- lea dstq, [dstq + dst_strideq*4]
+ lea refq, [refq + ref_strideq*4]
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
%endif
dec block_height
pavgw m3, m5
pavgw m0, m2
pavgw m1, m3
- mova m4, [dstq]
- mova m5, [dstq + 16]
+ mova m4, [refq]
+ mova m5, [refq + 16]
%if %2 == 1 ; avg
- pavgw m0, [secq]
- pavgw m1, [secq+16]
+ pavgw m0, [second_predq]
+ pavgw m1, [second_predq+16]
%endif
SUM_SSE m0, m4, m1, m5, m6, m7
mova m0, m2
mova m1, m3
lea srcq, [srcq + src_strideq*2]
- lea dstq, [dstq + dst_strideq*2]
+ lea refq, [refq + ref_strideq*2]
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
%else ; %1 < 16
movu m0, [srcq]
pavgw m3, m5
pavgw m0, m2
pavgw m2, m3
- mova m4, [dstq]
- mova m5, [dstq + dst_strideq*2]
+ mova m4, [refq]
+ mova m5, [refq + ref_strideq*2]
%if %2 == 1 ; avg
- pavgw m0, [secq]
- add secq, sec_str
- pavgw m2, [secq]
+ pavgw m0, [second_predq]
+ add second_predq, second_str
+ pavgw m2, [second_predq]
%endif
SUM_SSE m0, m4, m2, m5, m6, m7
mova m0, m3
lea srcq, [srcq + src_strideq*4]
- lea dstq, [dstq + dst_strideq*4]
+ lea refq, [refq + ref_strideq*4]
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
%endif
dec block_height
paddw m0, filter_rnd
psrlw m1, 4
paddw m0, m2
- mova m2, [dstq]
+ mova m2, [refq]
psrlw m0, 4
- mova m3, [dstq+16]
+ mova m3, [refq+16]
%if %2 == 1 ; avg
- pavgw m0, [secq]
- pavgw m1, [secq+16]
+ pavgw m0, [second_predq]
+ pavgw m1, [second_predq+16]
%endif
SUM_SSE m0, m2, m1, m3, m6, m7
mova m0, m4
mova m1, m5
lea srcq, [srcq + src_strideq*2]
- lea dstq, [dstq + dst_strideq*2]
+ lea refq, [refq + ref_strideq*2]
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
%else ; %1 < 16
movu m0, [srcq]
paddw m0, filter_rnd
psrlw m4, 4
paddw m0, m2
- mova m2, [dstq]
+ mova m2, [refq]
psrlw m0, 4
- mova m3, [dstq+dst_strideq*2]
+ mova m3, [refq+ref_strideq*2]
%if %2 == 1 ; avg
- pavgw m0, [secq]
- add secq, sec_str
- pavgw m4, [secq]
+ pavgw m0, [second_predq]
+ add second_predq, second_str
+ pavgw m4, [second_predq]
%endif
SUM_SSE m0, m2, m4, m3, m6, m7
mova m0, m5
lea srcq, [srcq + src_strideq*4]
- lea dstq, [dstq + dst_strideq*4]
+ lea refq, [refq + ref_strideq*4]
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
%endif
dec block_height
movu m1, [srcq+16]
movu m2, [srcq+2]
movu m3, [srcq+18]
- mova m4, [dstq]
- mova m5, [dstq+16]
+ mova m4, [refq]
+ mova m5, [refq+16]
pmullw m1, filter_x_a
pmullw m3, filter_x_b
paddw m1, filter_rnd
psrlw m1, 4
psrlw m0, 4
%if %2 == 1 ; avg
- pavgw m0, [secq]
- pavgw m1, [secq+16]
+ pavgw m0, [second_predq]
+ pavgw m1, [second_predq+16]
%endif
SUM_SSE m0, m4, m1, m5, m6, m7
lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
+ lea refq, [refq+ref_strideq*2]
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
%else ; %1 < 16
movu m0, [srcq]
movu m1, [srcq+src_strideq*2]
movu m2, [srcq+2]
movu m3, [srcq+src_strideq*2+2]
- mova m4, [dstq]
- mova m5, [dstq+dst_strideq*2]
+ mova m4, [refq]
+ mova m5, [refq+ref_strideq*2]
pmullw m1, filter_x_a
pmullw m3, filter_x_b
paddw m1, filter_rnd
psrlw m1, 4
psrlw m0, 4
%if %2 == 1 ; avg
- pavgw m0, [secq]
- add secq, sec_str
- pavgw m1, [secq]
+ pavgw m0, [second_predq]
+ add second_predq, second_str
+ pavgw m1, [second_predq]
%endif
SUM_SSE m0, m4, m1, m5, m6, m7
lea srcq, [srcq+src_strideq*4]
- lea dstq, [dstq+dst_strideq*4]
+ lea refq, [refq+ref_strideq*4]
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
%endif
dec block_height
paddw m3, filter_rnd
paddw m2, m4
paddw m3, m5
- mova m4, [dstq]
- mova m5, [dstq+16]
+ mova m4, [refq]
+ mova m5, [refq+16]
psrlw m2, 4
psrlw m3, 4
pavgw m0, m2
pavgw m1, m3
%if %2 == 1 ; avg
- pavgw m0, [secq]
- pavgw m1, [secq+16]
+ pavgw m0, [second_predq]
+ pavgw m1, [second_predq+16]
%endif
SUM_SSE m0, m4, m1, m5, m6, m7
mova m0, m2
mova m1, m3
lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
+ lea refq, [refq+ref_strideq*2]
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
%else ; %1 < 16
movu m0, [srcq]
paddw m3, filter_rnd
paddw m2, m4
paddw m3, m5
- mova m4, [dstq]
- mova m5, [dstq+dst_strideq*2]
+ mova m4, [refq]
+ mova m5, [refq+ref_strideq*2]
psrlw m2, 4
psrlw m3, 4
pavgw m0, m2
pavgw m2, m3
%if %2 == 1 ; avg
- pavgw m0, [secq]
- add secq, sec_str
- pavgw m2, [secq]
+ pavgw m0, [second_predq]
+ add second_predq, second_str
+ pavgw m2, [second_predq]
%endif
SUM_SSE m0, m4, m2, m5, m6, m7
mova m0, m3
lea srcq, [srcq+src_strideq*4]
- lea dstq, [dstq+dst_strideq*4]
+ lea refq, [refq+ref_strideq*4]
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
%endif
dec block_height
pmullw m3, filter_y_b
paddw m0, m2
paddw m1, filter_rnd
- mova m2, [dstq]
+ mova m2, [refq]
paddw m1, m3
psrlw m0, 4
psrlw m1, 4
- mova m3, [dstq+16]
+ mova m3, [refq+16]
%if %2 == 1 ; avg
- pavgw m0, [secq]
- pavgw m1, [secq+16]
+ pavgw m0, [second_predq]
+ pavgw m1, [second_predq+16]
%endif
SUM_SSE m0, m2, m1, m3, m6, m7
mova m0, m4
mova m1, m5
INC_SRC_BY_SRC_STRIDE
- lea dstq, [dstq + dst_strideq * 2]
+ lea refq, [refq + ref_strideq * 2]
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
%else ; %1 < 16
movu m0, [srcq]
pmullw m3, filter_y_b
paddw m0, m2
paddw m4, filter_rnd
- mova m2, [dstq]
+ mova m2, [refq]
paddw m4, m3
psrlw m0, 4
psrlw m4, 4
- mova m3, [dstq+dst_strideq*2]
+ mova m3, [refq+ref_strideq*2]
%if %2 == 1 ; avg
- pavgw m0, [secq]
- add secq, sec_str
- pavgw m4, [secq]
+ pavgw m0, [second_predq]
+ add second_predq, second_str
+ pavgw m4, [second_predq]
%endif
SUM_SSE m0, m2, m4, m3, m6, m7
mova m0, m5
INC_SRC_BY_SRC_STRIDE
- lea dstq, [dstq + dst_strideq * 4]
+ lea refq, [refq + ref_strideq * 4]
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
%endif
dec block_height
;unsigned int vpx_highbd_calc16x16var_sse2
;(
; unsigned char * src_ptr,
-; int source_stride,
+; int src_stride,
; unsigned char * ref_ptr,
-; int recon_stride,
+; int ref_stride,
; unsigned int * SSE,
; int * Sum
;)
mov rsi, arg(0) ;[src_ptr]
mov rdi, arg(2) ;[ref_ptr]
- movsxd rax, DWORD PTR arg(1) ;[source_stride]
- movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
+ movsxd rax, DWORD PTR arg(1) ;[src_stride]
+ movsxd rdx, DWORD PTR arg(3) ;[ref_stride]
add rax, rax ; source stride in bytes
add rdx, rdx ; recon stride in bytes
;unsigned int vpx_highbd_calc8x8var_sse2
;(
; unsigned char * src_ptr,
-; int source_stride,
+; int src_stride,
; unsigned char * ref_ptr,
-; int recon_stride,
+; int ref_stride,
; unsigned int * SSE,
; int * Sum
;)
mov rsi, arg(0) ;[src_ptr]
mov rdi, arg(2) ;[ref_ptr]
- movsxd rax, DWORD PTR arg(1) ;[source_stride]
- movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
+ movsxd rax, DWORD PTR arg(1) ;[src_stride]
+ movsxd rdx, DWORD PTR arg(3) ;[ref_stride]
add rax, rax ; source stride in bytes
add rdx, rdx ; recon stride in bytes
#define DECL(w, opt) \
int vpx_highbd_sub_pixel_variance##w##xh_##opt( \
const uint16_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \
- const uint16_t *dst, ptrdiff_t dst_stride, int height, \
+ const uint16_t *ref, ptrdiff_t ref_stride, int height, \
unsigned int *sse, void *unused0, void *unused);
#define DECLS(opt) \
DECL(8, opt); \
#define FN(w, h, wf, wlog2, hlog2, opt, cast) \
uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt( \
const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
- const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr) { \
+ const uint8_t *ref8, int ref_stride, uint32_t *sse_ptr) { \
uint32_t sse; \
uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
- src, src_stride, x_offset, y_offset, dst, dst_stride, h, &sse, NULL, \
+ src, src_stride, x_offset, y_offset, ref, ref_stride, h, &sse, NULL, \
NULL); \
if (w > wf) { \
unsigned int sse2; \
int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
- src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride, h, \
+ src + 16, src_stride, x_offset, y_offset, ref + 16, ref_stride, h, \
&sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
if (w > wf * 2) { \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
- src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, h, \
+ src + 32, src_stride, x_offset, y_offset, ref + 32, ref_stride, h, \
&sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
- src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride, h, \
+ src + 48, src_stride, x_offset, y_offset, ref + 48, ref_stride, h, \
&sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
\
uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \
const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
- const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr) { \
+ const uint8_t *ref8, int ref_stride, uint32_t *sse_ptr) { \
int64_t var; \
uint32_t sse; \
uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
- src, src_stride, x_offset, y_offset, dst, dst_stride, h, &sse, NULL, \
+ src, src_stride, x_offset, y_offset, ref, ref_stride, h, &sse, NULL, \
NULL); \
if (w > wf) { \
uint32_t sse2; \
int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
- src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride, h, \
+ src + 16, src_stride, x_offset, y_offset, ref + 16, ref_stride, h, \
&sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
if (w > wf * 2) { \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
- src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, h, \
+ src + 32, src_stride, x_offset, y_offset, ref + 32, ref_stride, h, \
&sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
- src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride, h, \
+ src + 48, src_stride, x_offset, y_offset, ref + 48, ref_stride, h, \
&sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
\
uint32_t vpx_highbd_12_sub_pixel_variance##w##x##h##_##opt( \
const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
- const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr) { \
+ const uint8_t *ref8, int ref_stride, uint32_t *sse_ptr) { \
int start_row; \
uint32_t sse; \
int se = 0; \
int64_t var; \
uint64_t long_sse = 0; \
uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
for (start_row = 0; start_row < h; start_row += 16) { \
uint32_t sse2; \
int height = h - start_row < 16 ? h - start_row : 16; \
int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
src + (start_row * src_stride), src_stride, x_offset, y_offset, \
- dst + (start_row * dst_stride), dst_stride, height, &sse2, NULL, \
+ ref + (start_row * ref_stride), ref_stride, height, &sse2, NULL, \
NULL); \
se += se2; \
long_sse += sse2; \
if (w > wf) { \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
src + 16 + (start_row * src_stride), src_stride, x_offset, \
- y_offset, dst + 16 + (start_row * dst_stride), dst_stride, height, \
+ y_offset, ref + 16 + (start_row * ref_stride), ref_stride, height, \
&sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
if (w > wf * 2) { \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
src + 32 + (start_row * src_stride), src_stride, x_offset, \
- y_offset, dst + 32 + (start_row * dst_stride), dst_stride, \
+ y_offset, ref + 32 + (start_row * ref_stride), ref_stride, \
height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
src + 48 + (start_row * src_stride), src_stride, x_offset, \
- y_offset, dst + 48 + (start_row * dst_stride), dst_stride, \
+ y_offset, ref + 48 + (start_row * ref_stride), ref_stride, \
height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
#define DECL(w, opt) \
int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt( \
const uint16_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \
- const uint16_t *dst, ptrdiff_t dst_stride, const uint16_t *sec, \
- ptrdiff_t sec_stride, int height, unsigned int *sse, void *unused0, \
+ const uint16_t *ref, ptrdiff_t ref_stride, const uint16_t *second, \
+ ptrdiff_t second_stride, int height, unsigned int *sse, void *unused0, \
void *unused);
#define DECLS(opt1) \
DECL(16, opt1) \
#define FN(w, h, wf, wlog2, hlog2, opt, cast) \
uint32_t vpx_highbd_8_sub_pixel_avg_variance##w##x##h##_##opt( \
const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
- const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr, \
+ const uint8_t *ref8, int ref_stride, uint32_t *sse_ptr, \
const uint8_t *sec8) { \
uint32_t sse; \
uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \
int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src, src_stride, x_offset, y_offset, dst, dst_stride, sec, w, h, &sse, \
+ src, src_stride, x_offset, y_offset, ref, ref_stride, sec, w, h, &sse, \
NULL, NULL); \
if (w > wf) { \
uint32_t sse2; \
int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride, \
+ src + 16, src_stride, x_offset, y_offset, ref + 16, ref_stride, \
sec + 16, w, h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
if (w > wf * 2) { \
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, \
+ src + 32, src_stride, x_offset, y_offset, ref + 32, ref_stride, \
sec + 32, w, h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride, \
+ src + 48, src_stride, x_offset, y_offset, ref + 48, ref_stride, \
sec + 48, w, h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
\
uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \
const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
- const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr, \
+ const uint8_t *ref8, int ref_stride, uint32_t *sse_ptr, \
const uint8_t *sec8) { \
int64_t var; \
uint32_t sse; \
uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \
int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src, src_stride, x_offset, y_offset, dst, dst_stride, sec, w, h, &sse, \
+ src, src_stride, x_offset, y_offset, ref, ref_stride, sec, w, h, &sse, \
NULL, NULL); \
if (w > wf) { \
uint32_t sse2; \
int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride, \
+ src + 16, src_stride, x_offset, y_offset, ref + 16, ref_stride, \
sec + 16, w, h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
if (w > wf * 2) { \
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, \
+ src + 32, src_stride, x_offset, y_offset, ref + 32, ref_stride, \
sec + 32, w, h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride, \
+ src + 48, src_stride, x_offset, y_offset, ref + 48, ref_stride, \
sec + 48, w, h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
\
uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
- const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr, \
+ const uint8_t *ref8, int ref_stride, uint32_t *sse_ptr, \
const uint8_t *sec8) { \
int start_row; \
int64_t var; \
int se = 0; \
uint64_t long_sse = 0; \
uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \
for (start_row = 0; start_row < h; start_row += 16) { \
uint32_t sse2; \
int height = h - start_row < 16 ? h - start_row : 16; \
int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + (start_row * src_stride), src_stride, x_offset, y_offset, \
- dst + (start_row * dst_stride), dst_stride, sec + (start_row * w), \
+ ref + (start_row * ref_stride), ref_stride, sec + (start_row * w), \
w, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
if (w > wf) { \
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 16 + (start_row * src_stride), src_stride, x_offset, \
- y_offset, dst + 16 + (start_row * dst_stride), dst_stride, \
+ y_offset, ref + 16 + (start_row * ref_stride), ref_stride, \
sec + 16 + (start_row * w), w, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
if (w > wf * 2) { \
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 32 + (start_row * src_stride), src_stride, x_offset, \
- y_offset, dst + 32 + (start_row * dst_stride), dst_stride, \
+ y_offset, ref + 32 + (start_row * ref_stride), ref_stride, \
sec + 32 + (start_row * w), w, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 48 + (start_row * src_stride), src_stride, x_offset, \
- y_offset, dst + 48 + (start_row * dst_stride), dst_stride, \
+ y_offset, ref + 48 + (start_row * ref_stride), ref_stride, \
sec + 48 + (start_row * w), w, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
; int vpx_sub_pixel_varianceNxh(const uint8_t *src, ptrdiff_t src_stride,
; int x_offset, int y_offset,
-; const uint8_t *dst, ptrdiff_t dst_stride,
+; const uint8_t *ref, ptrdiff_t ref_stride,
; int height, unsigned int *sse);
;
; This function returns the SE and stores SSE in the given pointer.
-%macro SUM_SSE 6 ; src1, dst1, src2, dst2, sum, sse
+%macro SUM_SSE 6 ; src1, ref1, src2, ref2, sum, sse
psubw %3, %4
psubw %1, %2
paddw %5, %3
%if ARCH_X86_64
%if %2 == 1 ; avg
cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
- x_offset, y_offset, dst, dst_stride, \
- sec, sec_stride, height, sse
- %define sec_str sec_strideq
+ x_offset, y_offset, ref, ref_stride, \
+ second_pred, second_stride, height, sse
+ %define second_str second_strideq
%else
cglobal sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, \
- x_offset, y_offset, dst, dst_stride, \
+ x_offset, y_offset, ref, ref_stride, \
height, sse
%endif
%define block_height heightd
%if CONFIG_PIC=1
%if %2 == 1 ; avg
cglobal sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
- x_offset, y_offset, dst, dst_stride, \
- sec, sec_stride, height, sse
+ x_offset, y_offset, ref, ref_stride, \
+ second_pred, second_stride, height, sse
%define block_height dword heightm
- %define sec_str sec_stridemp
+ %define second_str second_stridemp
%else
cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
- x_offset, y_offset, dst, dst_stride, \
+ x_offset, y_offset, ref, ref_stride, \
height, sse
%define block_height heightd
%endif
%if %2 == 1 ; avg
cglobal sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
x_offset, y_offset, \
- dst, dst_stride, sec, sec_stride, \
+ ref, ref_stride, second_pred, second_stride, \
height, sse
%define block_height dword heightm
- %define sec_str sec_stridemp
+ %define second_str second_stridemp
%else
cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
- x_offset, y_offset, dst, dst_stride, \
+ x_offset, y_offset, ref, ref_stride, \
height, sse
%define block_height heightd
%endif
%if %1 < 16
sar block_height, 1
%if %2 == 1 ; avg
- shl sec_str, 1
+ shl second_str, 1
%endif
%endif
.x_zero_y_zero_loop:
%if %1 == 16
movu m0, [srcq]
- mova m1, [dstq]
+ mova m1, [refq]
%if %2 == 1 ; avg
- pavgb m0, [secq]
+ pavgb m0, [second_predq]
punpckhbw m3, m1, m5
punpcklbw m1, m5
%endif
SUM_SSE m0, m1, m2, m3, m6, m7
add srcq, src_strideq
- add dstq, dst_strideq
+ add refq, ref_strideq
%else ; %1 < 16
movx m0, [srcq]
%if %2 == 1 ; avg
movx m2, [srcq+src_strideq]
%endif
- movx m1, [dstq]
- movx m3, [dstq+dst_strideq]
+ movx m1, [refq]
+ movx m3, [refq+ref_strideq]
%if %2 == 1 ; avg
%if %1 > 4
- pavgb m0, [secq]
+ pavgb m0, [second_predq]
%else
- movh m2, [secq]
+ movh m2, [second_predq]
pavgb m0, m2
%endif
punpcklbw m3, m5
SUM_SSE m0, m1, m2, m3, m6, m7
lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
+ lea refq, [refq+ref_strideq*2]
%endif
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
dec block_height
jg .x_zero_y_zero_loop
%if %1 == 16
movu m0, [srcq]
movu m4, [srcq+src_strideq]
- mova m1, [dstq]
+ mova m1, [refq]
pavgb m0, m4
punpckhbw m3, m1, m5
%if %2 == 1 ; avg
- pavgb m0, [secq]
+ pavgb m0, [second_predq]
%endif
punpcklbw m1, m5
punpckhbw m2, m0, m5
SUM_SSE m0, m1, m2, m3, m6, m7
add srcq, src_strideq
- add dstq, dst_strideq
+ add refq, ref_strideq
%else ; %1 < 16
movx m0, [srcq]
movx m2, [srcq+src_strideq]
movx m1, [srcq+src_strideq*2]
punpckldq m2, m1
%endif
- movx m1, [dstq]
+ movx m1, [refq]
%if %1 > 4
movlhps m0, m2
%else ; 4xh
punpckldq m0, m2
%endif
- movx m3, [dstq+dst_strideq]
+ movx m3, [refq+ref_strideq]
pavgb m0, m2
punpcklbw m1, m5
%if %1 > 4
- pavgb m0, [secq]
+ pavgb m0, [second_predq]
punpcklbw m3, m5
punpckhbw m2, m0, m5
punpcklbw m0, m5
%else ; 4xh
- movh m4, [secq]
+ movh m4, [second_predq]
pavgb m0, m4
punpcklbw m3, m5
punpcklbw m0, m5
%endif
%else ; !avg
movx m4, [srcq+src_strideq*2]
- movx m1, [dstq]
+ movx m1, [refq]
pavgb m0, m2
- movx m3, [dstq+dst_strideq]
+ movx m3, [refq+ref_strideq]
pavgb m2, m4
punpcklbw m0, m5
punpcklbw m2, m5
SUM_SSE m0, m1, m2, m3, m6, m7
lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
+ lea refq, [refq+ref_strideq*2]
%endif
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
dec block_height
jg .x_zero_y_half_loop
%if %1 == 16
movu m0, [srcq]
movu m4, [srcq+src_strideq]
- mova m1, [dstq]
+ mova m1, [refq]
%if cpuflag(ssse3)
punpckhbw m2, m0, m4
punpcklbw m0, m4
%if %2 == 1 ; avg
; FIXME(rbultje) pipeline
packuswb m0, m2
- pavgb m0, [secq]
+ pavgb m0, [second_predq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
%endif
SUM_SSE m0, m1, m2, m3, m6, m7
add srcq, src_strideq
- add dstq, dst_strideq
+ add refq, ref_strideq
%else ; %1 < 16
movx m0, [srcq]
movx m2, [srcq+src_strideq]
movx m4, [srcq+src_strideq*2]
- movx m3, [dstq+dst_strideq]
+ movx m3, [refq+ref_strideq]
%if cpuflag(ssse3)
- movx m1, [dstq]
+ movx m1, [refq]
punpcklbw m0, m2
punpcklbw m2, m4
pmaddubsw m0, filter_y_a
pmullw m4, filter_y_b
paddw m0, m1
paddw m2, filter_rnd
- movx m1, [dstq]
+ movx m1, [refq]
paddw m2, m4
%endif
psraw m0, 4
%endif
packuswb m0, m2
%if %1 > 4
- pavgb m0, [secq]
+ pavgb m0, [second_predq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
%else ; 4xh
- movh m2, [secq]
+ movh m2, [second_predq]
pavgb m0, m2
punpcklbw m0, m5
movhlps m2, m0
SUM_SSE m0, m1, m2, m3, m6, m7
lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
+ lea refq, [refq+ref_strideq*2]
%endif
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
dec block_height
jg .x_zero_y_other_loop
%if %1 == 16
movu m0, [srcq]
movu m4, [srcq+1]
- mova m1, [dstq]
+ mova m1, [refq]
pavgb m0, m4
punpckhbw m3, m1, m5
%if %2 == 1 ; avg
- pavgb m0, [secq]
+ pavgb m0, [second_predq]
%endif
punpcklbw m1, m5
punpckhbw m2, m0, m5
SUM_SSE m0, m1, m2, m3, m6, m7
add srcq, src_strideq
- add dstq, dst_strideq
+ add refq, ref_strideq
%else ; %1 < 16
movx m0, [srcq]
movx m4, [srcq+1]
movx m2, [srcq+src_strideq+1]
punpckldq m4, m2
%endif
- movx m1, [dstq]
- movx m3, [dstq+dst_strideq]
+ movx m1, [refq]
+ movx m3, [refq+ref_strideq]
pavgb m0, m4
punpcklbw m3, m5
%if %1 > 4
- pavgb m0, [secq]
+ pavgb m0, [second_predq]
punpcklbw m1, m5
punpckhbw m2, m0, m5
punpcklbw m0, m5
%else ; 4xh
- movh m2, [secq]
+ movh m2, [second_predq]
pavgb m0, m2
punpcklbw m1, m5
punpcklbw m0, m5
%endif
%else ; !avg
movx m2, [srcq+src_strideq]
- movx m1, [dstq]
+ movx m1, [refq]
pavgb m0, m4
movx m4, [srcq+src_strideq+1]
- movx m3, [dstq+dst_strideq]
+ movx m3, [refq+ref_strideq]
pavgb m2, m4
punpcklbw m0, m5
punpcklbw m2, m5
SUM_SSE m0, m1, m2, m3, m6, m7
lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
+ lea refq, [refq+ref_strideq*2]
%endif
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
dec block_height
jg .x_half_y_zero_loop
.x_half_y_half_loop:
movu m4, [srcq]
movu m3, [srcq+1]
- mova m1, [dstq]
+ mova m1, [refq]
pavgb m4, m3
punpckhbw m3, m1, m5
pavgb m0, m4
%if %2 == 1 ; avg
punpcklbw m1, m5
- pavgb m0, [secq]
+ pavgb m0, [second_predq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
%else
mova m0, m4
add srcq, src_strideq
- add dstq, dst_strideq
+ add refq, ref_strideq
%else ; %1 < 16
movx m0, [srcq]
movx m3, [srcq+1]
punpckldq m0, m2
pshuflw m4, m2, 0xe
%endif
- movx m1, [dstq]
+ movx m1, [refq]
pavgb m0, m2
- movx m3, [dstq+dst_strideq]
+ movx m3, [refq+ref_strideq]
%if %1 > 4
- pavgb m0, [secq]
+ pavgb m0, [second_predq]
%else
- movh m2, [secq]
+ movh m2, [second_predq]
pavgb m0, m2
%endif
punpcklbw m3, m5
pavgb m4, m1
pavgb m0, m2
pavgb m2, m4
- movx m1, [dstq]
- movx m3, [dstq+dst_strideq]
+ movx m1, [refq]
+ movx m3, [refq+ref_strideq]
punpcklbw m0, m5
punpcklbw m2, m5
punpcklbw m3, m5
mova m0, m4
lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
+ lea refq, [refq+ref_strideq*2]
%endif
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
dec block_height
jg .x_half_y_half_loop
.x_half_y_other_loop:
movu m4, [srcq]
movu m2, [srcq+1]
- mova m1, [dstq]
+ mova m1, [refq]
pavgb m4, m2
%if cpuflag(ssse3)
punpckhbw m2, m0, m4
%if %2 == 1 ; avg
; FIXME(rbultje) pipeline
packuswb m0, m2
- pavgb m0, [secq]
+ pavgb m0, [second_predq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
%endif
mova m0, m4
add srcq, src_strideq
- add dstq, dst_strideq
+ add refq, ref_strideq
%else ; %1 < 16
movx m0, [srcq]
movx m3, [srcq+1]
movx m3, [srcq+src_strideq+1]
pavgb m2, m1
pavgb m4, m3
- movx m3, [dstq+dst_strideq]
+ movx m3, [refq+ref_strideq]
%if cpuflag(ssse3)
- movx m1, [dstq]
+ movx m1, [refq]
punpcklbw m0, m2
punpcklbw m2, m4
pmaddubsw m0, filter_y_a
pmullw m1, m4, filter_y_b
paddw m2, filter_rnd
paddw m2, m1
- movx m1, [dstq]
+ movx m1, [refq]
%endif
psraw m0, 4
psraw m2, 4
%endif
packuswb m0, m2
%if %1 > 4
- pavgb m0, [secq]
+ pavgb m0, [second_predq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
%else
- movh m2, [secq]
+ movh m2, [second_predq]
pavgb m0, m2
punpcklbw m0, m5
movhlps m2, m0
mova m0, m4
lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
+ lea refq, [refq+ref_strideq*2]
%endif
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
dec block_height
jg .x_half_y_other_loop
%if %1 == 16
movu m0, [srcq]
movu m4, [srcq+1]
- mova m1, [dstq]
+ mova m1, [refq]
%if cpuflag(ssse3)
punpckhbw m2, m0, m4
punpcklbw m0, m4
%if %2 == 1 ; avg
; FIXME(rbultje) pipeline
packuswb m0, m2
- pavgb m0, [secq]
+ pavgb m0, [second_predq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
%endif
SUM_SSE m0, m1, m2, m3, m6, m7
add srcq, src_strideq
- add dstq, dst_strideq
+ add refq, ref_strideq
%else ; %1 < 16
movx m0, [srcq]
movx m1, [srcq+1]
movx m2, [srcq+src_strideq]
movx m4, [srcq+src_strideq+1]
- movx m3, [dstq+dst_strideq]
+ movx m3, [refq+ref_strideq]
%if cpuflag(ssse3)
punpcklbw m0, m1
- movx m1, [dstq]
+ movx m1, [refq]
punpcklbw m2, m4
pmaddubsw m0, filter_x_a
pmaddubsw m2, filter_x_a
pmullw m4, filter_x_b
paddw m0, m1
paddw m2, filter_rnd
- movx m1, [dstq]
+ movx m1, [refq]
paddw m2, m4
%endif
psraw m0, 4
%endif
packuswb m0, m2
%if %1 > 4
- pavgb m0, [secq]
+ pavgb m0, [second_predq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
%else
- movh m2, [secq]
+ movh m2, [second_predq]
pavgb m0, m2
punpcklbw m0, m5
movhlps m2, m0
SUM_SSE m0, m1, m2, m3, m6, m7
lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
+ lea refq, [refq+ref_strideq*2]
%endif
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
dec block_height
jg .x_other_y_zero_loop
movu m4, [srcq]
movu m3, [srcq+1]
%if cpuflag(ssse3)
- mova m1, [dstq]
+ mova m1, [refq]
punpckhbw m2, m4, m3
punpcklbw m4, m3
pmaddubsw m2, filter_x_a
paddw m2, filter_rnd
paddw m4, m3
paddw m2, m1
- mova m1, [dstq]
+ mova m1, [refq]
psraw m4, 4
psraw m2, 4
punpckhbw m3, m1, m5
%endif
%if %2 == 1 ; avg
; FIXME(rbultje) pipeline
- pavgb m0, [secq]
+ pavgb m0, [second_predq]
%endif
punpckhbw m2, m0, m5
punpcklbw m0, m5
mova m0, m4
add srcq, src_strideq
- add dstq, dst_strideq
+ add refq, ref_strideq
%else ; %1 < 16
movx m0, [srcq]
movx m1, [srcq+1]
punpcklbw m4, m3
pmaddubsw m2, filter_x_a
pmaddubsw m4, filter_x_a
- movx m1, [dstq]
- movx m3, [dstq+dst_strideq]
+ movx m1, [refq]
+ movx m3, [refq+ref_strideq]
paddw m2, filter_rnd
paddw m4, filter_rnd
%else
pmullw m3, filter_x_b
paddw m4, filter_rnd
paddw m2, m1
- movx m1, [dstq]
+ movx m1, [refq]
paddw m4, m3
- movx m3, [dstq+dst_strideq]
+ movx m3, [refq+ref_strideq]
%endif
psraw m2, 4
psraw m4, 4
%endif
packuswb m0, m2
%if %1 > 4
- pavgb m0, [secq]
+ pavgb m0, [second_predq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
%else
- movh m2, [secq]
+ movh m2, [second_predq]
pavgb m0, m2
punpcklbw m0, m5
movhlps m2, m0
mova m0, m4
lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
+ lea refq, [refq+ref_strideq*2]
%endif
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
dec block_height
jg .x_other_y_half_loop
%if cpuflag(ssse3)
movu m4, [srcq]
movu m3, [srcq+1]
- mova m1, [dstq]
+ mova m1, [refq]
punpckhbw m2, m4, m3
punpcklbw m4, m3
pmaddubsw m2, filter_x_a
pmullw m0, filter_y_a
pmullw m3, filter_y_b
paddw m2, m1
- mova m1, [dstq]
+ mova m1, [refq]
paddw m0, filter_rnd
psraw m2, 4
paddw m0, m3
%if %2 == 1 ; avg
; FIXME(rbultje) pipeline
packuswb m0, m2
- pavgb m0, [secq]
+ pavgb m0, [second_predq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
%endif
mova m0, m4
INC_SRC_BY_SRC_STRIDE
- add dstq, dst_strideq
+ add refq, ref_strideq
%else ; %1 < 16
movx m0, [srcq]
movx m1, [srcq+1]
punpcklbw m4, m3
pmaddubsw m2, filter_x_a
pmaddubsw m4, filter_x_a
- movx m3, [dstq+dst_strideq]
- movx m1, [dstq]
+ movx m3, [refq+ref_strideq]
+ movx m1, [refq]
paddw m2, filter_rnd
paddw m4, filter_rnd
psraw m2, 4
pmullw m1, m4, filter_y_b
paddw m2, filter_rnd
paddw m0, m3
- movx m3, [dstq+dst_strideq]
+ movx m3, [refq+ref_strideq]
paddw m2, m1
- movx m1, [dstq]
+ movx m1, [refq]
psraw m0, 4
psraw m2, 4
punpcklbw m3, m5
%endif
packuswb m0, m2
%if %1 > 4
- pavgb m0, [secq]
+ pavgb m0, [second_predq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
%else
- movh m2, [secq]
+ movh m2, [second_predq]
pavgb m0, m2
punpcklbw m0, m5
movhlps m2, m0
mova m0, m4
INC_SRC_BY_SRC_STRIDE
- lea dstq, [dstq+dst_strideq*2]
+ lea refq, [refq+ref_strideq*2]
%endif
%if %2 == 1 ; avg
- add secq, sec_str
+ add second_predq, second_str
%endif
dec block_height
jg .x_other_y_other_loop
}
}
-void vpx_get16x16var_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride, unsigned int *sse,
- int *sum) {
+void vpx_get16x16var_avx2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
+ unsigned int *sse, int *sum) {
__m256i vsse, vsum;
- variance16_avx2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum);
+ variance16_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum);
variance_final_from_16bit_sum_avx2(vsse, vsum, sse, sum);
}
static INLINE void spv32_x0_y0(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
- const uint8_t *sec, int sec_stride, int do_sec,
- int height, __m256i *sum_reg, __m256i *sse_reg) {
+ const uint8_t *second_pred, int second_stride,
+ int do_sec, int height, __m256i *sum_reg,
+ __m256i *sse_reg) {
const __m256i zero_reg = _mm256_setzero_si256();
__m256i exp_src_lo, exp_src_hi, exp_dst_lo, exp_dst_hi;
int i;
const __m256i dst_reg = _mm256_loadu_si256((__m256i const *)dst);
const __m256i src_reg = _mm256_loadu_si256((__m256i const *)src);
if (do_sec) {
- const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)sec);
+ const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred);
const __m256i avg_reg = _mm256_avg_epu8(src_reg, sec_reg);
exp_src_lo = _mm256_unpacklo_epi8(avg_reg, zero_reg);
exp_src_hi = _mm256_unpackhi_epi8(avg_reg, zero_reg);
- sec += sec_stride;
+ second_pred += second_stride;
} else {
exp_src_lo = _mm256_unpacklo_epi8(src_reg, zero_reg);
exp_src_hi = _mm256_unpackhi_epi8(src_reg, zero_reg);
// (x == 0, y == 4) or (x == 4, y == 0). sstep determines the direction.
static INLINE void spv32_half_zero(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
- const uint8_t *sec, int sec_stride,
- int do_sec, int height, __m256i *sum_reg,
- __m256i *sse_reg, int sstep) {
+ const uint8_t *second_pred,
+ int second_stride, int do_sec, int height,
+ __m256i *sum_reg, __m256i *sse_reg,
+ int sstep) {
const __m256i zero_reg = _mm256_setzero_si256();
__m256i exp_src_lo, exp_src_hi, exp_dst_lo, exp_dst_hi;
int i;
const __m256i src_1 = _mm256_loadu_si256((__m256i const *)(src + sstep));
const __m256i src_avg = _mm256_avg_epu8(src_0, src_1);
if (do_sec) {
- const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)sec);
+ const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred);
const __m256i avg_reg = _mm256_avg_epu8(src_avg, sec_reg);
exp_src_lo = _mm256_unpacklo_epi8(avg_reg, zero_reg);
exp_src_hi = _mm256_unpackhi_epi8(avg_reg, zero_reg);
- sec += sec_stride;
+ second_pred += second_stride;
} else {
exp_src_lo = _mm256_unpacklo_epi8(src_avg, zero_reg);
exp_src_hi = _mm256_unpackhi_epi8(src_avg, zero_reg);
static INLINE void spv32_x0_y4(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
- const uint8_t *sec, int sec_stride, int do_sec,
- int height, __m256i *sum_reg, __m256i *sse_reg) {
- spv32_half_zero(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec,
- height, sum_reg, sse_reg, src_stride);
+ const uint8_t *second_pred, int second_stride,
+ int do_sec, int height, __m256i *sum_reg,
+ __m256i *sse_reg) {
+ spv32_half_zero(src, src_stride, dst, dst_stride, second_pred, second_stride,
+ do_sec, height, sum_reg, sse_reg, src_stride);
}
static INLINE void spv32_x4_y0(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
- const uint8_t *sec, int sec_stride, int do_sec,
- int height, __m256i *sum_reg, __m256i *sse_reg) {
- spv32_half_zero(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec,
- height, sum_reg, sse_reg, 1);
+ const uint8_t *second_pred, int second_stride,
+ int do_sec, int height, __m256i *sum_reg,
+ __m256i *sse_reg) {
+ spv32_half_zero(src, src_stride, dst, dst_stride, second_pred, second_stride,
+ do_sec, height, sum_reg, sse_reg, 1);
}
static INLINE void spv32_x4_y4(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
- const uint8_t *sec, int sec_stride, int do_sec,
- int height, __m256i *sum_reg, __m256i *sse_reg) {
+ const uint8_t *second_pred, int second_stride,
+ int do_sec, int height, __m256i *sum_reg,
+ __m256i *sse_reg) {
const __m256i zero_reg = _mm256_setzero_si256();
const __m256i src_a = _mm256_loadu_si256((__m256i const *)src);
const __m256i src_b = _mm256_loadu_si256((__m256i const *)(src + 1));
prev_src_avg = src_avg;
if (do_sec) {
- const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)sec);
+ const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred);
const __m256i avg_reg = _mm256_avg_epu8(current_avg, sec_reg);
exp_src_lo = _mm256_unpacklo_epi8(avg_reg, zero_reg);
exp_src_hi = _mm256_unpackhi_epi8(avg_reg, zero_reg);
- sec += sec_stride;
+ second_pred += second_stride;
} else {
exp_src_lo = _mm256_unpacklo_epi8(current_avg, zero_reg);
exp_src_hi = _mm256_unpackhi_epi8(current_avg, zero_reg);
// (x == 0, y == bil) or (x == 4, y == bil). sstep determines the direction.
static INLINE void spv32_bilin_zero(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
- const uint8_t *sec, int sec_stride,
- int do_sec, int height, __m256i *sum_reg,
- __m256i *sse_reg, int offset, int sstep) {
+ const uint8_t *second_pred,
+ int second_stride, int do_sec, int height,
+ __m256i *sum_reg, __m256i *sse_reg,
+ int offset, int sstep) {
const __m256i zero_reg = _mm256_setzero_si256();
const __m256i pw8 = _mm256_set1_epi16(8);
const __m256i filter = _mm256_load_si256(
FILTER_SRC(filter)
if (do_sec) {
- const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)sec);
+ const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred);
const __m256i exp_src = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
const __m256i avg_reg = _mm256_avg_epu8(exp_src, sec_reg);
- sec += sec_stride;
+ second_pred += second_stride;
exp_src_lo = _mm256_unpacklo_epi8(avg_reg, zero_reg);
exp_src_hi = _mm256_unpackhi_epi8(avg_reg, zero_reg);
}
static INLINE void spv32_x0_yb(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
- const uint8_t *sec, int sec_stride, int do_sec,
- int height, __m256i *sum_reg, __m256i *sse_reg,
- int y_offset) {
- spv32_bilin_zero(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec,
- height, sum_reg, sse_reg, y_offset, src_stride);
+ const uint8_t *second_pred, int second_stride,
+ int do_sec, int height, __m256i *sum_reg,
+ __m256i *sse_reg, int y_offset) {
+ spv32_bilin_zero(src, src_stride, dst, dst_stride, second_pred, second_stride,
+ do_sec, height, sum_reg, sse_reg, y_offset, src_stride);
}
static INLINE void spv32_xb_y0(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
- const uint8_t *sec, int sec_stride, int do_sec,
- int height, __m256i *sum_reg, __m256i *sse_reg,
- int x_offset) {
- spv32_bilin_zero(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec,
- height, sum_reg, sse_reg, x_offset, 1);
+ const uint8_t *second_pred, int second_stride,
+ int do_sec, int height, __m256i *sum_reg,
+ __m256i *sse_reg, int x_offset) {
+ spv32_bilin_zero(src, src_stride, dst, dst_stride, second_pred, second_stride,
+ do_sec, height, sum_reg, sse_reg, x_offset, 1);
}
static INLINE void spv32_x4_yb(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
- const uint8_t *sec, int sec_stride, int do_sec,
- int height, __m256i *sum_reg, __m256i *sse_reg,
- int y_offset) {
+ const uint8_t *second_pred, int second_stride,
+ int do_sec, int height, __m256i *sum_reg,
+ __m256i *sse_reg, int y_offset) {
const __m256i zero_reg = _mm256_setzero_si256();
const __m256i pw8 = _mm256_set1_epi16(8);
const __m256i filter = _mm256_load_si256(
FILTER_SRC(filter)
if (do_sec) {
- const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)sec);
+ const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred);
const __m256i exp_src_avg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
const __m256i avg_reg = _mm256_avg_epu8(exp_src_avg, sec_reg);
exp_src_lo = _mm256_unpacklo_epi8(avg_reg, zero_reg);
exp_src_hi = _mm256_unpackhi_epi8(avg_reg, zero_reg);
- sec += sec_stride;
+ second_pred += second_stride;
}
CALC_SUM_SSE_INSIDE_LOOP
dst += dst_stride;
static INLINE void spv32_xb_y4(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
- const uint8_t *sec, int sec_stride, int do_sec,
- int height, __m256i *sum_reg, __m256i *sse_reg,
- int x_offset) {
+ const uint8_t *second_pred, int second_stride,
+ int do_sec, int height, __m256i *sum_reg,
+ __m256i *sse_reg, int x_offset) {
const __m256i zero_reg = _mm256_setzero_si256();
const __m256i pw8 = _mm256_set1_epi16(8);
const __m256i filter = _mm256_load_si256(
src_pack = _mm256_avg_epu8(src_pack, src_reg);
if (do_sec) {
- const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)sec);
+ const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred);
const __m256i avg_pack = _mm256_avg_epu8(src_pack, sec_reg);
exp_src_lo = _mm256_unpacklo_epi8(avg_pack, zero_reg);
exp_src_hi = _mm256_unpackhi_epi8(avg_pack, zero_reg);
- sec += sec_stride;
+ second_pred += second_stride;
} else {
exp_src_lo = _mm256_unpacklo_epi8(src_pack, zero_reg);
exp_src_hi = _mm256_unpackhi_epi8(src_pack, zero_reg);
static INLINE void spv32_xb_yb(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
- const uint8_t *sec, int sec_stride, int do_sec,
- int height, __m256i *sum_reg, __m256i *sse_reg,
- int x_offset, int y_offset) {
+ const uint8_t *second_pred, int second_stride,
+ int do_sec, int height, __m256i *sum_reg,
+ __m256i *sse_reg, int x_offset, int y_offset) {
const __m256i zero_reg = _mm256_setzero_si256();
const __m256i pw8 = _mm256_set1_epi16(8);
const __m256i xfilter = _mm256_load_si256(
FILTER_SRC(yfilter)
if (do_sec) {
- const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)sec);
+ const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred);
const __m256i exp_src = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
const __m256i avg_reg = _mm256_avg_epu8(exp_src, sec_reg);
exp_src_lo = _mm256_unpacklo_epi8(avg_reg, zero_reg);
exp_src_hi = _mm256_unpackhi_epi8(avg_reg, zero_reg);
- sec += sec_stride;
+ second_pred += second_stride;
}
prev_src_pack = src_pack;
static INLINE int sub_pix_var32xh(const uint8_t *src, int src_stride,
int x_offset, int y_offset,
const uint8_t *dst, int dst_stride,
- const uint8_t *sec, int sec_stride,
+ const uint8_t *second_pred, int second_stride,
int do_sec, int height, unsigned int *sse) {
const __m256i zero_reg = _mm256_setzero_si256();
__m256i sum_reg = _mm256_setzero_si256();
// x_offset = 0 and y_offset = 0
if (x_offset == 0) {
if (y_offset == 0) {
- spv32_x0_y0(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec,
- height, &sum_reg, &sse_reg);
+ spv32_x0_y0(src, src_stride, dst, dst_stride, second_pred, second_stride,
+ do_sec, height, &sum_reg, &sse_reg);
// x_offset = 0 and y_offset = 4
} else if (y_offset == 4) {
- spv32_x0_y4(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec,
- height, &sum_reg, &sse_reg);
+ spv32_x0_y4(src, src_stride, dst, dst_stride, second_pred, second_stride,
+ do_sec, height, &sum_reg, &sse_reg);
// x_offset = 0 and y_offset = bilin interpolation
} else {
- spv32_x0_yb(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec,
- height, &sum_reg, &sse_reg, y_offset);
+ spv32_x0_yb(src, src_stride, dst, dst_stride, second_pred, second_stride,
+ do_sec, height, &sum_reg, &sse_reg, y_offset);
}
// x_offset = 4 and y_offset = 0
} else if (x_offset == 4) {
if (y_offset == 0) {
- spv32_x4_y0(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec,
- height, &sum_reg, &sse_reg);
+ spv32_x4_y0(src, src_stride, dst, dst_stride, second_pred, second_stride,
+ do_sec, height, &sum_reg, &sse_reg);
// x_offset = 4 and y_offset = 4
} else if (y_offset == 4) {
- spv32_x4_y4(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec,
- height, &sum_reg, &sse_reg);
+ spv32_x4_y4(src, src_stride, dst, dst_stride, second_pred, second_stride,
+ do_sec, height, &sum_reg, &sse_reg);
// x_offset = 4 and y_offset = bilin interpolation
} else {
- spv32_x4_yb(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec,
- height, &sum_reg, &sse_reg, y_offset);
+ spv32_x4_yb(src, src_stride, dst, dst_stride, second_pred, second_stride,
+ do_sec, height, &sum_reg, &sse_reg, y_offset);
}
// x_offset = bilin interpolation and y_offset = 0
} else {
if (y_offset == 0) {
- spv32_xb_y0(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec,
- height, &sum_reg, &sse_reg, x_offset);
+ spv32_xb_y0(src, src_stride, dst, dst_stride, second_pred, second_stride,
+ do_sec, height, &sum_reg, &sse_reg, x_offset);
// x_offset = bilin interpolation and y_offset = 4
} else if (y_offset == 4) {
- spv32_xb_y4(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec,
- height, &sum_reg, &sse_reg, x_offset);
+ spv32_xb_y4(src, src_stride, dst, dst_stride, second_pred, second_stride,
+ do_sec, height, &sum_reg, &sse_reg, x_offset);
// x_offset = bilin interpolation and y_offset = bilin interpolation
} else {
- spv32_xb_yb(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec,
- height, &sum_reg, &sse_reg, x_offset, y_offset);
+ spv32_xb_yb(src, src_stride, dst, dst_stride, second_pred, second_stride,
+ do_sec, height, &sum_reg, &sse_reg, x_offset, y_offset);
}
}
CALC_SUM_AND_SSE
static unsigned int sub_pixel_avg_variance32xh_avx2(
const uint8_t *src, int src_stride, int x_offset, int y_offset,
- const uint8_t *dst, int dst_stride, const uint8_t *sec, int sec_stride,
- int height, unsigned int *sse) {
+ const uint8_t *dst, int dst_stride, const uint8_t *second_pred,
+ int second_stride, int height, unsigned int *sse) {
return sub_pix_var32xh(src, src_stride, x_offset, y_offset, dst, dst_stride,
- sec, sec_stride, 1, height, sse);
+ second_pred, second_stride, 1, height, sse);
}
-typedef void (*get_var_avx2)(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+typedef void (*get_var_avx2)(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse, int *sum);
-unsigned int vpx_variance16x8_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance16x8_avx2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
int sum;
__m256i vsse, vsum;
- variance16_avx2(src, src_stride, ref, ref_stride, 8, &vsse, &vsum);
+ variance16_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 8, &vsse, &vsum);
variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum);
return *sse - (uint32_t)(((int64_t)sum * sum) >> 7);
}
-unsigned int vpx_variance16x16_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
int sum;
__m256i vsse, vsum;
- variance16_avx2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum);
+ variance16_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum);
variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum);
return *sse - (uint32_t)(((int64_t)sum * sum) >> 8);
}
-unsigned int vpx_variance16x32_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance16x32_avx2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
int sum;
__m256i vsse, vsum;
- variance16_avx2(src, src_stride, ref, ref_stride, 32, &vsse, &vsum);
+ variance16_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 32, &vsse, &vsum);
variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum);
return *sse - (uint32_t)(((int64_t)sum * sum) >> 9);
}
-unsigned int vpx_variance32x16_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
int sum;
__m256i vsse, vsum;
- variance32_avx2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum);
+ variance32_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum);
variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum);
return *sse - (uint32_t)(((int64_t)sum * sum) >> 9);
}
-unsigned int vpx_variance32x32_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
int sum;
__m256i vsse, vsum;
__m128i vsum_128;
- variance32_avx2(src, src_stride, ref, ref_stride, 32, &vsse, &vsum);
+ variance32_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 32, &vsse, &vsum);
vsum_128 = _mm_add_epi16(_mm256_castsi256_si128(vsum),
_mm256_extractf128_si256(vsum, 1));
vsum_128 = _mm_add_epi32(_mm_cvtepi16_epi32(vsum_128),
return *sse - (uint32_t)(((int64_t)sum * sum) >> 10);
}
-unsigned int vpx_variance32x64_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance32x64_avx2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
int sum;
__m256i vsse, vsum;
__m128i vsum_128;
- variance32_avx2(src, src_stride, ref, ref_stride, 64, &vsse, &vsum);
+ variance32_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 64, &vsse, &vsum);
vsum = sum_to_32bit_avx2(vsum);
vsum_128 = _mm_add_epi32(_mm256_castsi256_si128(vsum),
_mm256_extractf128_si256(vsum, 1));
return *sse - (uint32_t)(((int64_t)sum * sum) >> 11);
}
-unsigned int vpx_variance64x32_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
__m256i vsse = _mm256_setzero_si256();
__m256i vsum = _mm256_setzero_si256();
__m128i vsum_128;
int sum;
- variance64_avx2(src, src_stride, ref, ref_stride, 32, &vsse, &vsum);
+ variance64_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 32, &vsse, &vsum);
vsum = sum_to_32bit_avx2(vsum);
vsum_128 = _mm_add_epi32(_mm256_castsi256_si128(vsum),
_mm256_extractf128_si256(vsum, 1));
return *sse - (uint32_t)(((int64_t)sum * sum) >> 11);
}
-unsigned int vpx_variance64x64_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
__m256i vsse = _mm256_setzero_si256();
__m256i vsum = _mm256_setzero_si256();
for (i = 0; i < 2; i++) {
__m256i vsum16;
- variance64_avx2(src + 32 * i * src_stride, src_stride,
- ref + 32 * i * ref_stride, ref_stride, 32, &vsse, &vsum16);
+ variance64_avx2(src_ptr + 32 * i * src_stride, src_stride,
+ ref_ptr + 32 * i * ref_stride, ref_stride, 32, &vsse,
+ &vsum16);
vsum = _mm256_add_epi32(vsum, sum_to_32bit_avx2(vsum16));
}
vsum_128 = _mm_add_epi32(_mm256_castsi256_si128(vsum),
return *sse - (unsigned int)(((int64_t)sum * sum) >> 12);
}
-unsigned int vpx_mse16x8_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_mse16x8_avx2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
int sum;
__m256i vsse, vsum;
- variance16_avx2(src, src_stride, ref, ref_stride, 8, &vsse, &vsum);
+ variance16_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 8, &vsse, &vsum);
variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum);
return *sse;
}
-unsigned int vpx_mse16x16_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
int sum;
__m256i vsse, vsum;
- variance16_avx2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum);
+ variance16_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum);
variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum);
return *sse;
}
-unsigned int vpx_sub_pixel_variance64x64_avx2(const uint8_t *src,
- int src_stride, int x_offset,
- int y_offset, const uint8_t *dst,
- int dst_stride,
- unsigned int *sse) {
+unsigned int vpx_sub_pixel_variance64x64_avx2(
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,
+ const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) {
unsigned int sse1;
const int se1 = sub_pixel_variance32xh_avx2(
- src, src_stride, x_offset, y_offset, dst, dst_stride, 64, &sse1);
+ src_ptr, src_stride, x_offset, y_offset, ref_ptr, ref_stride, 64, &sse1);
unsigned int sse2;
const int se2 =
- sub_pixel_variance32xh_avx2(src + 32, src_stride, x_offset, y_offset,
- dst + 32, dst_stride, 64, &sse2);
+ sub_pixel_variance32xh_avx2(src_ptr + 32, src_stride, x_offset, y_offset,
+ ref_ptr + 32, ref_stride, 64, &sse2);
const int se = se1 + se2;
*sse = sse1 + sse2;
return *sse - (uint32_t)(((int64_t)se * se) >> 12);
}
-unsigned int vpx_sub_pixel_variance32x32_avx2(const uint8_t *src,
- int src_stride, int x_offset,
- int y_offset, const uint8_t *dst,
- int dst_stride,
- unsigned int *sse) {
+unsigned int vpx_sub_pixel_variance32x32_avx2(
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,
+ const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) {
const int se = sub_pixel_variance32xh_avx2(
- src, src_stride, x_offset, y_offset, dst, dst_stride, 32, sse);
+ src_ptr, src_stride, x_offset, y_offset, ref_ptr, ref_stride, 32, sse);
return *sse - (uint32_t)(((int64_t)se * se) >> 10);
}
unsigned int vpx_sub_pixel_avg_variance64x64_avx2(
- const uint8_t *src, int src_stride, int x_offset, int y_offset,
- const uint8_t *dst, int dst_stride, unsigned int *sse, const uint8_t *sec) {
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,
+ const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
+ const uint8_t *second_pred) {
unsigned int sse1;
- const int se1 = sub_pixel_avg_variance32xh_avx2(
- src, src_stride, x_offset, y_offset, dst, dst_stride, sec, 64, 64, &sse1);
+ const int se1 = sub_pixel_avg_variance32xh_avx2(src_ptr, src_stride, x_offset,
+ y_offset, ref_ptr, ref_stride,
+ second_pred, 64, 64, &sse1);
unsigned int sse2;
const int se2 = sub_pixel_avg_variance32xh_avx2(
- src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, sec + 32,
- 64, 64, &sse2);
+ src_ptr + 32, src_stride, x_offset, y_offset, ref_ptr + 32, ref_stride,
+ second_pred + 32, 64, 64, &sse2);
const int se = se1 + se2;
*sse = sse1 + sse2;
}
unsigned int vpx_sub_pixel_avg_variance32x32_avx2(
- const uint8_t *src, int src_stride, int x_offset, int y_offset,
- const uint8_t *dst, int dst_stride, unsigned int *sse, const uint8_t *sec) {
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,
+ const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
+ const uint8_t *second_pred) {
// Process 32 elements in parallel.
- const int se = sub_pixel_avg_variance32xh_avx2(
- src, src_stride, x_offset, y_offset, dst, dst_stride, sec, 32, 32, sse);
+ const int se = sub_pixel_avg_variance32xh_avx2(src_ptr, src_stride, x_offset,
+ y_offset, ref_ptr, ref_stride,
+ second_pred, 32, 32, sse);
return *sse - (uint32_t)(((int64_t)se * se) >> 10);
}
return _mm_cvtsi128_si32(val);
}
-unsigned int vpx_get_mb_ss_sse2(const int16_t *src) {
+unsigned int vpx_get_mb_ss_sse2(const int16_t *src_ptr) {
__m128i vsum = _mm_setzero_si128();
int i;
for (i = 0; i < 32; ++i) {
- const __m128i v = _mm_loadu_si128((const __m128i *)src);
+ const __m128i v = _mm_loadu_si128((const __m128i *)src_ptr);
vsum = _mm_add_epi32(vsum, _mm_madd_epi16(v, v));
- src += 8;
+ src_ptr += 8;
}
return add32x4_sse2(vsum);
return _mm_unpacklo_epi8(p01, _mm_setzero_si128());
}
-static INLINE void variance_kernel_sse2(const __m128i src, const __m128i ref,
+static INLINE void variance_kernel_sse2(const __m128i src_ptr,
+ const __m128i ref_ptr,
__m128i *const sse,
__m128i *const sum) {
- const __m128i diff = _mm_sub_epi16(src, ref);
+ const __m128i diff = _mm_sub_epi16(src_ptr, ref_ptr);
*sse = _mm_add_epi32(*sse, _mm_madd_epi16(diff, diff));
*sum = _mm_add_epi16(*sum, diff);
}
return add32x4_sse2(t);
}
-static INLINE void variance4_sse2(const uint8_t *src, const int src_stride,
- const uint8_t *ref, const int ref_stride,
+static INLINE void variance4_sse2(const uint8_t *src_ptr, const int src_stride,
+ const uint8_t *ref_ptr, const int ref_stride,
const int h, __m128i *const sse,
__m128i *const sum) {
int i;
*sum = _mm_setzero_si128();
for (i = 0; i < h; i += 2) {
- const __m128i s = load4x2_sse2(src, src_stride);
- const __m128i r = load4x2_sse2(ref, ref_stride);
+ const __m128i s = load4x2_sse2(src_ptr, src_stride);
+ const __m128i r = load4x2_sse2(ref_ptr, ref_stride);
variance_kernel_sse2(s, r, sse, sum);
- src += 2 * src_stride;
- ref += 2 * ref_stride;
+ src_ptr += 2 * src_stride;
+ ref_ptr += 2 * ref_stride;
}
}
-static INLINE void variance8_sse2(const uint8_t *src, const int src_stride,
- const uint8_t *ref, const int ref_stride,
+static INLINE void variance8_sse2(const uint8_t *src_ptr, const int src_stride,
+ const uint8_t *ref_ptr, const int ref_stride,
const int h, __m128i *const sse,
__m128i *const sum) {
const __m128i zero = _mm_setzero_si128();
for (i = 0; i < h; i++) {
const __m128i s =
- _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)src), zero);
+ _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)src_ptr), zero);
const __m128i r =
- _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)ref), zero);
+ _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)ref_ptr), zero);
variance_kernel_sse2(s, r, sse, sum);
- src += src_stride;
- ref += ref_stride;
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
}
}
-static INLINE void variance16_kernel_sse2(const uint8_t *const src,
- const uint8_t *const ref,
+static INLINE void variance16_kernel_sse2(const uint8_t *const src_ptr,
+ const uint8_t *const ref_ptr,
__m128i *const sse,
__m128i *const sum) {
const __m128i zero = _mm_setzero_si128();
- const __m128i s = _mm_loadu_si128((const __m128i *)src);
- const __m128i r = _mm_loadu_si128((const __m128i *)ref);
+ const __m128i s = _mm_loadu_si128((const __m128i *)src_ptr);
+ const __m128i r = _mm_loadu_si128((const __m128i *)ref_ptr);
const __m128i src0 = _mm_unpacklo_epi8(s, zero);
const __m128i ref0 = _mm_unpacklo_epi8(r, zero);
const __m128i src1 = _mm_unpackhi_epi8(s, zero);
variance_kernel_sse2(src1, ref1, sse, sum);
}
-static INLINE void variance16_sse2(const uint8_t *src, const int src_stride,
- const uint8_t *ref, const int ref_stride,
+static INLINE void variance16_sse2(const uint8_t *src_ptr, const int src_stride,
+ const uint8_t *ref_ptr, const int ref_stride,
const int h, __m128i *const sse,
__m128i *const sum) {
int i;
*sum = _mm_setzero_si128();
for (i = 0; i < h; ++i) {
- variance16_kernel_sse2(src, ref, sse, sum);
- src += src_stride;
- ref += ref_stride;
+ variance16_kernel_sse2(src_ptr, ref_ptr, sse, sum);
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
}
}
-static INLINE void variance32_sse2(const uint8_t *src, const int src_stride,
- const uint8_t *ref, const int ref_stride,
+static INLINE void variance32_sse2(const uint8_t *src_ptr, const int src_stride,
+ const uint8_t *ref_ptr, const int ref_stride,
const int h, __m128i *const sse,
__m128i *const sum) {
int i;
*sum = _mm_setzero_si128();
for (i = 0; i < h; ++i) {
- variance16_kernel_sse2(src + 0, ref + 0, sse, sum);
- variance16_kernel_sse2(src + 16, ref + 16, sse, sum);
- src += src_stride;
- ref += ref_stride;
+ variance16_kernel_sse2(src_ptr + 0, ref_ptr + 0, sse, sum);
+ variance16_kernel_sse2(src_ptr + 16, ref_ptr + 16, sse, sum);
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
}
}
-static INLINE void variance64_sse2(const uint8_t *src, const int src_stride,
- const uint8_t *ref, const int ref_stride,
+static INLINE void variance64_sse2(const uint8_t *src_ptr, const int src_stride,
+ const uint8_t *ref_ptr, const int ref_stride,
const int h, __m128i *const sse,
__m128i *const sum) {
int i;
*sum = _mm_setzero_si128();
for (i = 0; i < h; ++i) {
- variance16_kernel_sse2(src + 0, ref + 0, sse, sum);
- variance16_kernel_sse2(src + 16, ref + 16, sse, sum);
- variance16_kernel_sse2(src + 32, ref + 32, sse, sum);
- variance16_kernel_sse2(src + 48, ref + 48, sse, sum);
- src += src_stride;
- ref += ref_stride;
+ variance16_kernel_sse2(src_ptr + 0, ref_ptr + 0, sse, sum);
+ variance16_kernel_sse2(src_ptr + 16, ref_ptr + 16, sse, sum);
+ variance16_kernel_sse2(src_ptr + 32, ref_ptr + 32, sse, sum);
+ variance16_kernel_sse2(src_ptr + 48, ref_ptr + 48, sse, sum);
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
}
}
-void vpx_get8x8var_sse2(const uint8_t *src, int src_stride, const uint8_t *ref,
- int ref_stride, unsigned int *sse, int *sum) {
+void vpx_get8x8var_sse2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
+ unsigned int *sse, int *sum) {
__m128i vsse, vsum;
- variance8_sse2(src, src_stride, ref, ref_stride, 8, &vsse, &vsum);
+ variance8_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 8, &vsse, &vsum);
variance_final_128_pel_sse2(vsse, vsum, sse, sum);
}
-void vpx_get16x16var_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride, unsigned int *sse,
- int *sum) {
+void vpx_get16x16var_sse2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
+ unsigned int *sse, int *sum) {
__m128i vsse, vsum;
- variance16_sse2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum);
+ variance16_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum);
variance_final_256_pel_sse2(vsse, vsum, sse, sum);
}
-unsigned int vpx_variance4x4_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
__m128i vsse, vsum;
int sum;
- variance4_sse2(src, src_stride, ref, ref_stride, 4, &vsse, &vsum);
+ variance4_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 4, &vsse, &vsum);
variance_final_128_pel_sse2(vsse, vsum, sse, &sum);
return *sse - ((sum * sum) >> 4);
}
-unsigned int vpx_variance4x8_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
__m128i vsse, vsum;
int sum;
- variance4_sse2(src, src_stride, ref, ref_stride, 8, &vsse, &vsum);
+ variance4_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 8, &vsse, &vsum);
variance_final_128_pel_sse2(vsse, vsum, sse, &sum);
return *sse - ((sum * sum) >> 5);
}
-unsigned int vpx_variance8x4_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
__m128i vsse, vsum;
int sum;
- variance8_sse2(src, src_stride, ref, ref_stride, 4, &vsse, &vsum);
+ variance8_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 4, &vsse, &vsum);
variance_final_128_pel_sse2(vsse, vsum, sse, &sum);
return *sse - ((sum * sum) >> 5);
}
-unsigned int vpx_variance8x8_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
__m128i vsse, vsum;
int sum;
- variance8_sse2(src, src_stride, ref, ref_stride, 8, &vsse, &vsum);
+ variance8_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 8, &vsse, &vsum);
variance_final_128_pel_sse2(vsse, vsum, sse, &sum);
return *sse - ((sum * sum) >> 6);
}
-unsigned int vpx_variance8x16_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
__m128i vsse, vsum;
int sum;
- variance8_sse2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum);
+ variance8_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum);
variance_final_128_pel_sse2(vsse, vsum, sse, &sum);
return *sse - ((sum * sum) >> 7);
}
-unsigned int vpx_variance16x8_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
__m128i vsse, vsum;
int sum;
- variance16_sse2(src, src_stride, ref, ref_stride, 8, &vsse, &vsum);
+ variance16_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 8, &vsse, &vsum);
variance_final_128_pel_sse2(vsse, vsum, sse, &sum);
return *sse - ((sum * sum) >> 7);
}
-unsigned int vpx_variance16x16_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
__m128i vsse, vsum;
int sum;
- variance16_sse2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum);
+ variance16_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum);
variance_final_256_pel_sse2(vsse, vsum, sse, &sum);
return *sse - (uint32_t)(((int64_t)sum * sum) >> 8);
}
-unsigned int vpx_variance16x32_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
__m128i vsse, vsum;
int sum;
- variance16_sse2(src, src_stride, ref, ref_stride, 32, &vsse, &vsum);
+ variance16_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 32, &vsse, &vsum);
variance_final_512_pel_sse2(vsse, vsum, sse, &sum);
return *sse - (unsigned int)(((int64_t)sum * sum) >> 9);
}
-unsigned int vpx_variance32x16_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
__m128i vsse = _mm_setzero_si128();
__m128i vsum;
int sum;
- variance32_sse2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum);
+ variance32_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum);
variance_final_512_pel_sse2(vsse, vsum, sse, &sum);
return *sse - (unsigned int)(((int64_t)sum * sum) >> 9);
}
-unsigned int vpx_variance32x32_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
__m128i vsse = _mm_setzero_si128();
__m128i vsum;
int sum;
- variance32_sse2(src, src_stride, ref, ref_stride, 32, &vsse, &vsum);
+ variance32_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 32, &vsse, &vsum);
*sse = add32x4_sse2(vsse);
sum = sum_final_sse2(vsum);
return *sse - (unsigned int)(((int64_t)sum * sum) >> 10);
}
-unsigned int vpx_variance32x64_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
__m128i vsse = _mm_setzero_si128();
__m128i vsum = _mm_setzero_si128();
for (i = 0; i < 2; i++) {
__m128i vsum16;
- variance32_sse2(src + 32 * i * src_stride, src_stride,
- ref + 32 * i * ref_stride, ref_stride, 32, &vsse, &vsum16);
+ variance32_sse2(src_ptr + 32 * i * src_stride, src_stride,
+ ref_ptr + 32 * i * ref_stride, ref_stride, 32, &vsse,
+ &vsum16);
vsum = _mm_add_epi32(vsum, sum_to_32bit_sse2(vsum16));
}
*sse = add32x4_sse2(vsse);
return *sse - (unsigned int)(((int64_t)sum * sum) >> 11);
}
-unsigned int vpx_variance64x32_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
__m128i vsse = _mm_setzero_si128();
__m128i vsum = _mm_setzero_si128();
for (i = 0; i < 2; i++) {
__m128i vsum16;
- variance64_sse2(src + 16 * i * src_stride, src_stride,
- ref + 16 * i * ref_stride, ref_stride, 16, &vsse, &vsum16);
+ variance64_sse2(src_ptr + 16 * i * src_stride, src_stride,
+ ref_ptr + 16 * i * ref_stride, ref_stride, 16, &vsse,
+ &vsum16);
vsum = _mm_add_epi32(vsum, sum_to_32bit_sse2(vsum16));
}
*sse = add32x4_sse2(vsse);
return *sse - (unsigned int)(((int64_t)sum * sum) >> 11);
}
-unsigned int vpx_variance64x64_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
__m128i vsse = _mm_setzero_si128();
__m128i vsum = _mm_setzero_si128();
for (i = 0; i < 4; i++) {
__m128i vsum16;
- variance64_sse2(src + 16 * i * src_stride, src_stride,
- ref + 16 * i * ref_stride, ref_stride, 16, &vsse, &vsum16);
+ variance64_sse2(src_ptr + 16 * i * src_stride, src_stride,
+ ref_ptr + 16 * i * ref_stride, ref_stride, 16, &vsse,
+ &vsum16);
vsum = _mm_add_epi32(vsum, sum_to_32bit_sse2(vsum16));
}
*sse = add32x4_sse2(vsse);
return *sse - (unsigned int)(((int64_t)sum * sum) >> 12);
}
-unsigned int vpx_mse8x8_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
- vpx_variance8x8_sse2(src, src_stride, ref, ref_stride, sse);
+ vpx_variance8x8_sse2(src_ptr, src_stride, ref_ptr, ref_stride, sse);
return *sse;
}
-unsigned int vpx_mse8x16_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
- vpx_variance8x16_sse2(src, src_stride, ref, ref_stride, sse);
+ vpx_variance8x16_sse2(src_ptr, src_stride, ref_ptr, ref_stride, sse);
return *sse;
}
-unsigned int vpx_mse16x8_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
- vpx_variance16x8_sse2(src, src_stride, ref, ref_stride, sse);
+ vpx_variance16x8_sse2(src_ptr, src_stride, ref_ptr, ref_stride, sse);
return *sse;
}
-unsigned int vpx_mse16x16_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
+unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) {
- vpx_variance16x16_sse2(src, src_stride, ref, ref_stride, sse);
+ vpx_variance16x16_sse2(src_ptr, src_stride, ref_ptr, ref_stride, sse);
return *sse;
}
// The 2 unused parameters are place holders for PIC enabled build.
// These definitions are for functions defined in subpel_variance.asm
-#define DECL(w, opt) \
- int vpx_sub_pixel_variance##w##xh_##opt( \
- const uint8_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \
- const uint8_t *dst, ptrdiff_t dst_stride, int height, unsigned int *sse, \
- void *unused0, void *unused)
+#define DECL(w, opt) \
+ int vpx_sub_pixel_variance##w##xh_##opt( \
+ const uint8_t *src_ptr, ptrdiff_t src_stride, int x_offset, \
+ int y_offset, const uint8_t *ref_ptr, ptrdiff_t ref_stride, int height, \
+ unsigned int *sse, void *unused0, void *unused)
#define DECLS(opt1, opt2) \
DECL(4, opt1); \
DECL(8, opt1); \
#undef DECLS
#undef DECL
-#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \
- unsigned int vpx_sub_pixel_variance##w##x##h##_##opt( \
- const uint8_t *src, int src_stride, int x_offset, int y_offset, \
- const uint8_t *dst, int dst_stride, unsigned int *sse_ptr) { \
- unsigned int sse; \
- int se = vpx_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset, \
- y_offset, dst, dst_stride, \
- h, &sse, NULL, NULL); \
- if (w > wf) { \
- unsigned int sse2; \
- int se2 = vpx_sub_pixel_variance##wf##xh_##opt( \
- src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride, h, \
- &sse2, NULL, NULL); \
- se += se2; \
- sse += sse2; \
- if (w > wf * 2) { \
- se2 = vpx_sub_pixel_variance##wf##xh_##opt( \
- src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, h, \
- &sse2, NULL, NULL); \
- se += se2; \
- sse += sse2; \
- se2 = vpx_sub_pixel_variance##wf##xh_##opt( \
- src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride, h, \
- &sse2, NULL, NULL); \
- se += se2; \
- sse += sse2; \
- } \
- } \
- *sse_ptr = sse; \
- return sse - (unsigned int)(cast_prod(cast se * se) >> (wlog2 + hlog2)); \
+#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \
+ unsigned int vpx_sub_pixel_variance##w##x##h##_##opt( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { \
+ unsigned int sse_tmp; \
+ int se = vpx_sub_pixel_variance##wf##xh_##opt( \
+ src_ptr, src_stride, x_offset, y_offset, ref_ptr, ref_stride, h, \
+ &sse_tmp, NULL, NULL); \
+ if (w > wf) { \
+ unsigned int sse2; \
+ int se2 = vpx_sub_pixel_variance##wf##xh_##opt( \
+ src_ptr + 16, src_stride, x_offset, y_offset, ref_ptr + 16, \
+ ref_stride, h, &sse2, NULL, NULL); \
+ se += se2; \
+ sse_tmp += sse2; \
+ if (w > wf * 2) { \
+ se2 = vpx_sub_pixel_variance##wf##xh_##opt( \
+ src_ptr + 32, src_stride, x_offset, y_offset, ref_ptr + 32, \
+ ref_stride, h, &sse2, NULL, NULL); \
+ se += se2; \
+ sse_tmp += sse2; \
+ se2 = vpx_sub_pixel_variance##wf##xh_##opt( \
+ src_ptr + 48, src_stride, x_offset, y_offset, ref_ptr + 48, \
+ ref_stride, h, &sse2, NULL, NULL); \
+ se += se2; \
+ sse_tmp += sse2; \
+ } \
+ } \
+ *sse = sse_tmp; \
+ return sse_tmp - \
+ (unsigned int)(cast_prod(cast se * se) >> (wlog2 + hlog2)); \
}
#define FNS(opt1, opt2) \
#undef FN
// The 2 unused parameters are place holders for PIC enabled build.
-#define DECL(w, opt) \
- int vpx_sub_pixel_avg_variance##w##xh_##opt( \
- const uint8_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \
- const uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *sec, \
- ptrdiff_t sec_stride, int height, unsigned int *sse, void *unused0, \
- void *unused)
+#define DECL(w, opt) \
+ int vpx_sub_pixel_avg_variance##w##xh_##opt( \
+ const uint8_t *src_ptr, ptrdiff_t src_stride, int x_offset, \
+ int y_offset, const uint8_t *ref_ptr, ptrdiff_t ref_stride, \
+ const uint8_t *second_pred, ptrdiff_t second_stride, int height, \
+ unsigned int *sse, void *unused0, void *unused)
#define DECLS(opt1, opt2) \
DECL(4, opt1); \
DECL(8, opt1); \
#undef DECL
#undef DECLS
-#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \
- unsigned int vpx_sub_pixel_avg_variance##w##x##h##_##opt( \
- const uint8_t *src, int src_stride, int x_offset, int y_offset, \
- const uint8_t *dst, int dst_stride, unsigned int *sseptr, \
- const uint8_t *sec) { \
- unsigned int sse; \
- int se = vpx_sub_pixel_avg_variance##wf##xh_##opt( \
- src, src_stride, x_offset, y_offset, dst, dst_stride, sec, w, h, &sse, \
- NULL, NULL); \
- if (w > wf) { \
- unsigned int sse2; \
- int se2 = vpx_sub_pixel_avg_variance##wf##xh_##opt( \
- src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride, \
- sec + 16, w, h, &sse2, NULL, NULL); \
- se += se2; \
- sse += sse2; \
- if (w > wf * 2) { \
- se2 = vpx_sub_pixel_avg_variance##wf##xh_##opt( \
- src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, \
- sec + 32, w, h, &sse2, NULL, NULL); \
- se += se2; \
- sse += sse2; \
- se2 = vpx_sub_pixel_avg_variance##wf##xh_##opt( \
- src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride, \
- sec + 48, w, h, &sse2, NULL, NULL); \
- se += se2; \
- sse += sse2; \
- } \
- } \
- *sseptr = sse; \
- return sse - (unsigned int)(cast_prod(cast se * se) >> (wlog2 + hlog2)); \
+#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \
+ unsigned int vpx_sub_pixel_avg_variance##w##x##h##_##opt( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, \
+ const uint8_t *second_pred) { \
+ unsigned int sse_tmp; \
+ int se = vpx_sub_pixel_avg_variance##wf##xh_##opt( \
+ src_ptr, src_stride, x_offset, y_offset, ref_ptr, ref_stride, \
+ second_pred, w, h, &sse_tmp, NULL, NULL); \
+ if (w > wf) { \
+ unsigned int sse2; \
+ int se2 = vpx_sub_pixel_avg_variance##wf##xh_##opt( \
+ src_ptr + 16, src_stride, x_offset, y_offset, ref_ptr + 16, \
+ ref_stride, second_pred + 16, w, h, &sse2, NULL, NULL); \
+ se += se2; \
+ sse_tmp += sse2; \
+ if (w > wf * 2) { \
+ se2 = vpx_sub_pixel_avg_variance##wf##xh_##opt( \
+ src_ptr + 32, src_stride, x_offset, y_offset, ref_ptr + 32, \
+ ref_stride, second_pred + 32, w, h, &sse2, NULL, NULL); \
+ se += se2; \
+ sse_tmp += sse2; \
+ se2 = vpx_sub_pixel_avg_variance##wf##xh_##opt( \
+ src_ptr + 48, src_stride, x_offset, y_offset, ref_ptr + 48, \
+ ref_stride, second_pred + 48, w, h, &sse2, NULL, NULL); \
+ se += se2; \
+ sse_tmp += sse2; \
+ } \
+ } \
+ *sse = sse_tmp; \
+ return sse_tmp - \
+ (unsigned int)(cast_prod(cast se * se) >> (wlog2 + hlog2)); \
}
#define FNS(opt1, opt2) \