INSTANTIATE_TEST_CASE_P(
SSE2, Loop8Test6Param,
::testing::Values(
- make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
- &vpx_highbd_lpf_horizontal_4_c, 8, 1),
+ make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_4_sse2>,
+ &wrapper_nc<vpx_highbd_lpf_horizontal_4_c>, 8, 1),
make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_4_sse2>,
&wrapper_nc<vpx_highbd_lpf_vertical_4_c>, 8, 1),
make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_8_sse2>,
&wrapper_nc<vpx_highbd_lpf_vertical_8_c>, 8, 1),
make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_16_sse2>,
&wrapper_nc<vpx_highbd_lpf_vertical_16_c>, 8, 1),
- make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
- &vpx_highbd_lpf_horizontal_4_c, 10, 1),
+ make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_4_sse2>,
+ &wrapper_nc<vpx_highbd_lpf_horizontal_4_c>, 10, 1),
make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_4_sse2>,
&wrapper_nc<vpx_highbd_lpf_vertical_4_c>, 10, 1),
make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_8_sse2>,
&wrapper_nc<vpx_highbd_lpf_vertical_8_c>, 10, 1),
make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_16_sse2>,
&wrapper_nc<vpx_highbd_lpf_vertical_16_c>, 10, 1),
- make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
- &vpx_highbd_lpf_horizontal_4_c, 12, 1),
+ make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_4_sse2>,
+ &wrapper_nc<vpx_highbd_lpf_horizontal_4_c>, 12, 1),
make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_4_sse2>,
&wrapper_nc<vpx_highbd_lpf_vertical_4_c>, 12, 1),
make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_8_sse2>,
} else {
if (mask_4x4_int & 1) {
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
} else if (mask_4x4_int & 2) {
vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr, 1, bd);
+ lfin->lim, lfin->hev_thr, bd);
}
}
count = 2;
if (mask_4x4_int & 1) {
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
}
}
} else if (mask_4x4 & 1) {
} else {
if (mask_4x4_int & 1) {
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
} else if (mask_4x4_int & 2) {
vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr, 1, bd);
+ lfin->lim, lfin->hev_thr, bd);
}
}
count = 2;
} else {
vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
if (mask_4x4_int & 1) {
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
}
}
} else if (mask_4x4_int & 1) {
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
}
}
s += 8 * count;
} else {
if (mask_4x4_int & 1) {
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
} else if (mask_4x4_int & 2) {
vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr, 1, bd);
+ lfin->lim, lfin->hev_thr, bd);
}
}
count = 2;
if (mask_4x4_int & 1) {
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
}
}
} else if (mask_4x4 & 1) {
} else {
if (mask_4x4_int & 1) {
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
} else if (mask_4x4_int & 2) {
vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr, 1, bd);
+ lfin->lim, lfin->hev_thr, bd);
}
}
count = 2;
} else {
vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
if (mask_4x4_int & 1) {
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
}
}
} else if (mask_4x4_int & 1) {
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
}
}
s += 8 * count;
void vpx_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */,
const uint8_t *blimit, const uint8_t *limit,
- const uint8_t *thresh, int count, int bd) {
+ const uint8_t *thresh, int bd) {
int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
- for (i = 0; i < 8 * count; ++i) {
+ for (i = 0; i < 8; ++i) {
const uint16_t p3 = s[-4 * p];
const uint16_t p2 = s[-3 * p];
const uint16_t p1 = s[-2 * p];
const uint8_t *limit1,
const uint8_t *thresh1,
int bd) {
- vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1, bd);
- vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);
+ vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, bd);
+ vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, bd);
}
void vpx_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,
add_proto qw/void vpx_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/vpx_highbd_lpf_horizontal_8_dual sse2/;
- add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
+ add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/vpx_highbd_lpf_horizontal_4 sse2/;
add_proto qw/void vpx_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
const uint8_t *_blimit,
const uint8_t *_limit,
const uint8_t *_thresh,
- int count, int bd) {
+ int bd) {
const __m128i zero = _mm_set1_epi16(0);
__m128i blimit, limit, thresh;
__m128i mask, hev, flat;
__m128i work_a;
__m128i filter1, filter2;
- (void)count;
-
if (bd == 8) {
blimit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero);
limit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero);
const uint8_t *_limit1,
const uint8_t *_thresh1,
int bd) {
- vpx_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);
- vpx_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, 1,
- bd);
+ vpx_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, bd);
+ vpx_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, bd);
}
static INLINE void highbd_transpose(uint16_t *src[], int in_p,
highbd_transpose(src, p, dst, 8, 1);
// Loop filtering
- vpx_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,
- bd);
+ vpx_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, bd);
src[0] = t_dst;
dst[0] = s - 4;