#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/blend_mask.h"
#include "vpx_dsp/x86/synonyms.h"
#include "./vpx_dsp_rtcd.h"
-#define MASK_BITS 6
-
//////////////////////////////////////////////////////////////////////////////
// Common kernels
//////////////////////////////////////////////////////////////////////////////
const __m128i v_sum_w = _mm_add_epi16(v_p0_w, v_p1_w);
- const __m128i v_res_w = xx_roundn_epu16(v_sum_w, MASK_BITS);
+ const __m128i v_res_w = xx_roundn_epu16(v_sum_w, MASK_BITS6);
return v_res_w;
}
const __m128i v_sum_w = _mm_add_epi16(v_p0_w, v_p1_w);
- const __m128i v_res_w = xx_roundn_epu16(v_sum_w, MASK_BITS);
+ const __m128i v_res_w = xx_roundn_epu16(v_sum_w, MASK_BITS6);
return v_res_w;
}
// No sub-sampling
//////////////////////////////////////////////////////////////////////////////
-static void blend_mask6_w4_sse4_1(
+static void blend_mask6b_w4_sse4_1(
uint8_t *dst, uint32_t dst_stride,
uint8_t *src0, uint32_t src0_stride,
uint8_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
(void)w;
} while (--h);
}
-static void blend_mask6_w8_sse4_1(
+static void blend_mask6b_w8_sse4_1(
uint8_t *dst, uint32_t dst_stride,
uint8_t *src0, uint32_t src0_stride,
uint8_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
(void)w;
} while (--h);
}
-static void blend_mask6_w16n_sse4_1(
+static void blend_mask6b_w16n_sse4_1(
uint8_t *dst, uint32_t dst_stride,
uint8_t *src0, uint32_t src0_stride,
uint8_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
do {
int c;
// Horizontal sub-sampling
//////////////////////////////////////////////////////////////////////////////
-static void blend_mask6_sx_w4_sse4_1(
+static void blend_mask6b_sx_w4_sse4_1(
uint8_t *dst, uint32_t dst_stride,
uint8_t *src0, uint32_t src0_stride,
uint8_t *src1, uint32_t src1_stride,
int h, int w) {
const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
(void)w;
} while (--h);
}
-static void blend_mask6_sx_w8_sse4_1(
+static void blend_mask6b_sx_w8_sse4_1(
uint8_t *dst, uint32_t dst_stride,
uint8_t *src0, uint32_t src0_stride,
uint8_t *src1, uint32_t src1_stride,
int h, int w) {
const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
(void)w;
} while (--h);
}
-static void blend_mask6_sx_w16n_sse4_1(
+static void blend_mask6b_sx_w16n_sse4_1(
uint8_t *dst, uint32_t dst_stride,
uint8_t *src0, uint32_t src0_stride,
uint8_t *src1, uint32_t src1_stride,
int h, int w) {
const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
do {
int c;
// Vertical sub-sampling
//////////////////////////////////////////////////////////////////////////////
-static void blend_mask6_sy_w4_sse4_1(
+static void blend_mask6b_sy_w4_sse4_1(
uint8_t *dst, uint32_t dst_stride,
uint8_t *src0, uint32_t src0_stride,
uint8_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
(void)w;
} while (--h);
}
-static void blend_mask6_sy_w8_sse4_1(
+static void blend_mask6b_sy_w8_sse4_1(
uint8_t *dst, uint32_t dst_stride,
uint8_t *src0, uint32_t src0_stride,
uint8_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
(void)w;
} while (--h);
}
-static void blend_mask6_sy_w16n_sse4_1(
+static void blend_mask6b_sy_w16n_sse4_1(
uint8_t *dst, uint32_t dst_stride,
uint8_t *src0, uint32_t src0_stride,
uint8_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
const __m128i v_zero = _mm_setzero_si128();
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
do {
int c;
// Horizontal and Vertical sub-sampling
//////////////////////////////////////////////////////////////////////////////
-static void blend_mask6_sx_sy_w4_sse4_1(
+static void blend_mask6b_sx_sy_w4_sse4_1(
uint8_t *dst, uint32_t dst_stride,
uint8_t *src0, uint32_t src0_stride,
uint8_t *src1, uint32_t src1_stride,
int h, int w) {
const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
(void)w;
} while (--h);
}
-static void blend_mask6_sx_sy_w8_sse4_1(
+static void blend_mask6b_sx_sy_w8_sse4_1(
uint8_t *dst, uint32_t dst_stride,
uint8_t *src0, uint32_t src0_stride,
uint8_t *src1, uint32_t src1_stride,
int h, int w) {
const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
(void)w;
} while (--h);
}
-static void blend_mask6_sx_sy_w16n_sse4_1(
+static void blend_mask6b_sx_sy_w16n_sse4_1(
uint8_t *dst, uint32_t dst_stride,
uint8_t *src0, uint32_t src0_stride,
uint8_t *src1, uint32_t src1_stride,
int h, int w) {
const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
do {
int c;
// Dispatch
//////////////////////////////////////////////////////////////////////////////
-void vpx_blend_mask6_sse4_1(uint8_t *dst, uint32_t dst_stride,
+void vpx_blend_mask6b_sse4_1(uint8_t *dst, uint32_t dst_stride,
uint8_t *src0, uint32_t src0_stride,
uint8_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
static blend_fn blend[3][2][2] = { // width_index X subx X suby
{ // w % 16 == 0
- {blend_mask6_w16n_sse4_1, blend_mask6_sy_w16n_sse4_1},
- {blend_mask6_sx_w16n_sse4_1, blend_mask6_sx_sy_w16n_sse4_1}
+ {blend_mask6b_w16n_sse4_1, blend_mask6b_sy_w16n_sse4_1},
+ {blend_mask6b_sx_w16n_sse4_1, blend_mask6b_sx_sy_w16n_sse4_1}
}, { // w == 4
- {blend_mask6_w4_sse4_1, blend_mask6_sy_w4_sse4_1},
- {blend_mask6_sx_w4_sse4_1, blend_mask6_sx_sy_w4_sse4_1}
+ {blend_mask6b_w4_sse4_1, blend_mask6b_sy_w4_sse4_1},
+ {blend_mask6b_sx_w4_sse4_1, blend_mask6b_sx_sy_w4_sse4_1}
}, { // w == 8
- {blend_mask6_w8_sse4_1, blend_mask6_sy_w8_sse4_1},
- {blend_mask6_sx_w8_sse4_1, blend_mask6_sx_sy_w8_sse4_1}
+ {blend_mask6b_w8_sse4_1, blend_mask6b_sy_w8_sse4_1},
+ {blend_mask6b_sx_w8_sse4_1, blend_mask6b_sx_sy_w8_sse4_1}
}
};
const __m128i v_sum_w = _mm_add_epi16(v_p0_w, v_p1_w);
- const __m128i v_res_w = xx_roundn_epu16(v_sum_w, MASK_BITS);
+ const __m128i v_res_w = xx_roundn_epu16(v_sum_w, MASK_BITS6);
return v_res_w;
}
const __m128i v_sum_w = _mm_add_epi16(v_p0_w, v_p1_w);
- const __m128i v_res_w = xx_roundn_epu16(v_sum_w, MASK_BITS);
+ const __m128i v_res_w = xx_roundn_epu16(v_sum_w, MASK_BITS6);
return v_res_w;
}
const __m128i v_sum_d = _mm_madd_epi16(v_s01_w, v_m01_w);
// Scale
- const __m128i v_ssum_d = _mm_srli_epi32(v_sum_d, MASK_BITS - 1);
+ const __m128i v_ssum_d = _mm_srli_epi32(v_sum_d, MASK_BITS6 - 1);
// Pack
const __m128i v_pssum_d = _mm_packs_epi32(v_ssum_d, v_ssum_d);
const __m128i v_sumh_d = _mm_madd_epi16(v_s01h_w, v_m01h_w);
// Scale
- const __m128i v_ssuml_d = _mm_srli_epi32(v_suml_d, MASK_BITS - 1);
- const __m128i v_ssumh_d = _mm_srli_epi32(v_sumh_d, MASK_BITS - 1);
+ const __m128i v_ssuml_d = _mm_srli_epi32(v_suml_d, MASK_BITS6 - 1);
+ const __m128i v_ssumh_d = _mm_srli_epi32(v_sumh_d, MASK_BITS6 - 1);
// Pack
const __m128i v_pssum_d = _mm_packs_epi32(v_ssuml_d, v_ssumh_d);
// No sub-sampling
//////////////////////////////////////////////////////////////////////////////
-static INLINE void blend_mask6_bn_w4_sse4_1(
+static INLINE void blend_mask6b_bn_w4_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, blend_unit_fn blend) {
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
do {
const __m128i v_m0_b = xx_loadl_32(mask);
} while (--h);
}
-static void blend_mask6_b10_w4_sse4_1(
+static void blend_mask6b_b10_w4_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
(void)w;
- blend_mask6_bn_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h,
- blend_4_b10);
+ blend_mask6b_bn_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h,
+ blend_4_b10);
}
-static void blend_mask6_b12_w4_sse4_1(
+static void blend_mask6b_b12_w4_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
(void)w;
- blend_mask6_bn_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h,
- blend_4_b12);
+ blend_mask6b_bn_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h,
+ blend_4_b12);
}
-static inline void blend_mask6_bn_w8n_sse4_1(
+static inline void blend_mask6b_bn_w8n_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w, blend_unit_fn blend) {
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
do {
int c;
} while (--h);
}
-static void blend_mask6_b10_w8n_sse4_1(
+static void blend_mask6b_b10_w8n_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
- blend_mask6_bn_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h, w,
- blend_8_b10);
+ blend_mask6b_bn_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h, w,
+ blend_8_b10);
}
-static void blend_mask6_b12_w8n_sse4_1(
+static void blend_mask6b_b12_w8n_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
- blend_mask6_bn_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h, w,
- blend_8_b12);
+ blend_mask6b_bn_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h, w,
+ blend_8_b12);
}
//////////////////////////////////////////////////////////////////////////////
// Horizontal sub-sampling
//////////////////////////////////////////////////////////////////////////////
-static INLINE void blend_mask6_bn_sx_w4_sse4_1(
+static INLINE void blend_mask6b_bn_sx_w4_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
int h, blend_unit_fn blend) {
const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
do {
const __m128i v_r_b = xx_loadl_64(mask);
} while (--h);
}
-static void blend_mask6_b10_sx_w4_sse4_1(
+static void blend_mask6b_b10_sx_w4_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
(void)w;
- blend_mask6_bn_sx_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h,
- blend_4_b10);
+ blend_mask6b_bn_sx_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h,
+ blend_4_b10);
}
-static void blend_mask6_b12_sx_w4_sse4_1(
+static void blend_mask6b_b12_sx_w4_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
(void)w;
- blend_mask6_bn_sx_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h,
- blend_4_b12);
+ blend_mask6b_bn_sx_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h,
+ blend_4_b12);
}
-static INLINE void blend_mask6_bn_sx_w8n_sse4_1(
+static INLINE void blend_mask6b_bn_sx_w8n_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
int h, int w, blend_unit_fn blend) {
const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
do {
int c;
} while (--h);
}
-static void blend_mask6_b10_sx_w8n_sse4_1(
+static void blend_mask6b_b10_sx_w8n_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
- blend_mask6_bn_sx_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h, w,
- blend_8_b10);
+ blend_mask6b_bn_sx_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h, w,
+ blend_8_b10);
}
-static void blend_mask6_b12_sx_w8n_sse4_1(
+static void blend_mask6b_b12_sx_w8n_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
- blend_mask6_bn_sx_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h, w,
- blend_8_b12);
+ blend_mask6b_bn_sx_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h, w,
+ blend_8_b12);
}
//////////////////////////////////////////////////////////////////////////////
// Vertical sub-sampling
//////////////////////////////////////////////////////////////////////////////
-static INLINE void blend_mask6_bn_sy_w4_sse4_1(
+static INLINE void blend_mask6b_bn_sy_w4_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, blend_unit_fn blend) {
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
do {
const __m128i v_ra_b = xx_loadl_32(mask);
} while (--h);
}
-static void blend_mask6_b10_sy_w4_sse4_1(
+static void blend_mask6b_b10_sy_w4_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
(void)w;
- blend_mask6_bn_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h,
- blend_4_b10);
+ blend_mask6b_bn_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h,
+ blend_4_b10);
}
-static void blend_mask6_b12_sy_w4_sse4_1(
+static void blend_mask6b_b12_sy_w4_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
(void)w;
- blend_mask6_bn_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h,
- blend_4_b12);
+ blend_mask6b_bn_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h,
+ blend_4_b12);
}
-static INLINE void blend_mask6_bn_sy_w8n_sse4_1(
+static INLINE void blend_mask6b_bn_sy_w8n_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w, blend_unit_fn blend) {
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
do {
int c;
} while (--h);
}
-static void blend_mask6_b10_sy_w8n_sse4_1(
+static void blend_mask6b_b10_sy_w8n_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
- blend_mask6_bn_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h, w,
- blend_8_b10);
+ blend_mask6b_bn_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h, w,
+ blend_8_b10);
}
-static void blend_mask6_b12_sy_w8n_sse4_1(
+static void blend_mask6b_b12_sy_w8n_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
- blend_mask6_bn_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h, w,
- blend_8_b12);
+ blend_mask6b_bn_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h, w,
+ blend_8_b12);
}
//////////////////////////////////////////////////////////////////////////////
// Horizontal and Vertical sub-sampling
//////////////////////////////////////////////////////////////////////////////
-static INLINE void blend_mask6_bn_sx_sy_w4_sse4_1(
+static INLINE void blend_mask6b_bn_sx_sy_w4_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
int h, blend_unit_fn blend) {
const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
do {
const __m128i v_ra_b = xx_loadl_64(mask);
} while (--h);
}
-static void blend_mask6_b10_sx_sy_w4_sse4_1(
+static void blend_mask6b_b10_sx_sy_w4_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
(void)w;
- blend_mask6_bn_sx_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h,
- blend_4_b10);
+ blend_mask6b_bn_sx_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h,
+ blend_4_b10);
}
-static void blend_mask6_b12_sx_sy_w4_sse4_1(
+static void blend_mask6b_b12_sx_sy_w4_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
(void)w;
- blend_mask6_bn_sx_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h,
- blend_4_b12);
+ blend_mask6b_bn_sx_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h,
+ blend_4_b12);
}
-static INLINE void blend_mask6_bn_sx_sy_w8n_sse4_1(
+static INLINE void blend_mask6b_bn_sx_sy_w8n_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
int h, int w, blend_unit_fn blend) {
const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
- const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS6);
do {
int c;
} while (--h);
}
-static void blend_mask6_b10_sx_sy_w8n_sse4_1(
+static void blend_mask6b_b10_sx_sy_w8n_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
- blend_mask6_bn_sx_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h, w,
- blend_8_b10);
+ blend_mask6b_bn_sx_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h, w,
+ blend_8_b10);
}
-static void blend_mask6_b12_sx_sy_w8n_sse4_1(
+static void blend_mask6b_b12_sx_sy_w8n_sse4_1(
uint16_t *dst, uint32_t dst_stride,
uint16_t *src0, uint32_t src0_stride,
uint16_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride,
int h, int w) {
- blend_mask6_bn_sx_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h, w,
- blend_8_b12);
+ blend_mask6b_bn_sx_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h, w,
+ blend_8_b12);
}
//////////////////////////////////////////////////////////////////////////////
// Dispatch
//////////////////////////////////////////////////////////////////////////////
-void vpx_highbd_blend_mask6_sse4_1(uint8_t *dst_8, uint32_t dst_stride,
- uint8_t *src0_8, uint32_t src0_stride,
- uint8_t *src1_8, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride,
- int h, int w, int suby, int subx, int bd) {
+void vpx_highbd_blend_mask6b_sse4_1(uint8_t *dst_8, uint32_t dst_stride,
+ uint8_t *src0_8, uint32_t src0_stride,
+ uint8_t *src1_8, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w, int suby, int subx, int bd) {
uint16_t *const dst = CONVERT_TO_SHORTPTR(dst_8);
uint16_t *const src0 = CONVERT_TO_SHORTPTR(src0_8);
uint16_t *const src1 = CONVERT_TO_SHORTPTR(src1_8);
static blend_fn blend[2][2][2][2] = { // bd_index X width_index X subx X suby
{ // bd == 8 or 10
{ // w % 8 == 0
- {blend_mask6_b10_w8n_sse4_1, blend_mask6_b10_sy_w8n_sse4_1},
- {blend_mask6_b10_sx_w8n_sse4_1, blend_mask6_b10_sx_sy_w8n_sse4_1}
+ {blend_mask6b_b10_w8n_sse4_1, blend_mask6b_b10_sy_w8n_sse4_1},
+ {blend_mask6b_b10_sx_w8n_sse4_1, blend_mask6b_b10_sx_sy_w8n_sse4_1}
}, { // w == 4
- {blend_mask6_b10_w4_sse4_1, blend_mask6_b10_sy_w4_sse4_1},
- {blend_mask6_b10_sx_w4_sse4_1, blend_mask6_b10_sx_sy_w4_sse4_1}
+ {blend_mask6b_b10_w4_sse4_1, blend_mask6b_b10_sy_w4_sse4_1},
+ {blend_mask6b_b10_sx_w4_sse4_1, blend_mask6b_b10_sx_sy_w4_sse4_1}
}
},
{ // bd == 12
{ // w % 8 == 0
- {blend_mask6_b12_w8n_sse4_1, blend_mask6_b12_sy_w8n_sse4_1},
- {blend_mask6_b12_sx_w8n_sse4_1, blend_mask6_b12_sx_sy_w8n_sse4_1}
+ {blend_mask6b_b12_w8n_sse4_1, blend_mask6b_b12_sy_w8n_sse4_1},
+ {blend_mask6b_b12_sx_w8n_sse4_1, blend_mask6b_b12_sx_sy_w8n_sse4_1}
}, { // w == 4
- {blend_mask6_b12_w4_sse4_1, blend_mask6_b12_sy_w4_sse4_1},
- {blend_mask6_b12_sx_w4_sse4_1, blend_mask6_b12_sx_sy_w4_sse4_1}
+ {blend_mask6b_b12_w4_sse4_1, blend_mask6b_b12_sy_w4_sse4_1},
+ {blend_mask6b_b12_sx_w4_sse4_1, blend_mask6b_b12_sx_sy_w4_sse4_1}
}
}
};