From: Yaowu Xu Date: Thu, 14 Jul 2016 17:57:35 +0000 (-0700) Subject: Merge branch 'master' into nextgenv2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6fe07a207b7ab81742c834a6418ed22b1f73c59c;p=libvpx Merge branch 'master' into nextgenv2 Change-Id: Ia3c0f2103fd997613d9f16156795028f89f63265 --- 6fe07a207b7ab81742c834a6418ed22b1f73c59c diff --cc test/convolve_test.cc index 120d475b4,73b0edb99..21f185a93 --- a/test/convolve_test.cc +++ b/test/convolve_test.cc @@@ -137,8 -117,9 +137,9 @@@ void filter_block2d_8_c(const uint8_t * // = 23 // and filter_max_width = 16 // - uint8_t intermediate_buffer[71 * kMaxDimension]; + uint8_t intermediate_buffer[(kMaxDimension+8) * kMaxDimension]; - const int intermediate_next_stride = 1 - intermediate_height * output_width; + const int intermediate_next_stride = + 1 - static_cast(intermediate_height * output_width); // Horizontal pass (src -> transposed intermediate). uint8_t *output_ptr = intermediate_buffer; @@@ -249,8 -230,9 +250,9 @@@ void highbd_filter_block2d_8_c(const ui * = 23 * and filter_max_width = 16 */ - uint16_t intermediate_buffer[71 * kMaxDimension]; + uint16_t intermediate_buffer[(kMaxDimension+8) * kMaxDimension]; - const int intermediate_next_stride = 1 - intermediate_height * output_width; + const int intermediate_next_stride = + 1 - static_cast(intermediate_height * output_width); // Horizontal pass (src -> transposed intermediate). { diff --cc test/fdct4x4_test.cc index 59ce89585,735cccf8d..f6b65676e --- a/test/fdct4x4_test.cc +++ b/test/fdct4x4_test.cc @@@ -296,28 -481,18 +296,18 @@@ INSTANTIATE_TEST_CASE_P INSTANTIATE_TEST_CASE_P( NEON, Trans4x4HT, ::testing::Values( - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 0, VPX_BITS_8), - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 1, VPX_BITS_8), - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 2, VPX_BITS_8), - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8))); + make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 0, VPX_BITS_8, 16), + make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 1, VPX_BITS_8, 16), + make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 2, VPX_BITS_8, 16), + make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8, 16))); #endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE - #if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH && \ - !CONFIG_EMULATE_HARDWARE - INSTANTIATE_TEST_CASE_P( - MMX, Trans4x4WHT, - ::testing::Values( - make_tuple(&vp9_fwht4x4_mmx, &vpx_iwht4x4_16_add_c, 0, - VPX_BITS_8, 16))); - #endif - - #if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && \ - !CONFIG_EMULATE_HARDWARE + #if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P( SSE2, Trans4x4WHT, ::testing::Values( - make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0, - VPX_BITS_8, 16))); - make_tuple(&vp9_fwht4x4_sse2, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8), - make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0, VPX_BITS_8))); ++ make_tuple(&vp9_fwht4x4_sse2, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8, 16), ++ make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0, VPX_BITS_8, 16))); #endif #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE diff --cc test/test.mk index 28c0caae8,04acd9626..cdef53c75 --- a/test/test.mk +++ b/test/test.mk @@@ -42,7 -44,9 +43,8 @@@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += frame_size_tests.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc -LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_end_to_end_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_ethread_test.cc + LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += level_test.cc LIBVPX_TEST_SRCS-yes += decode_test_driver.cc LIBVPX_TEST_SRCS-yes += decode_test_driver.h diff --cc test/variance_test.cc index 8ac85118c,cb6339041..7eaed271e --- a/test/variance_test.cc +++ b/test/variance_test.cc @@@ -872,220 -847,135 +872,196 @@@ INSTANTIATE_TEST_CASE_P make_tuple(4, 4, &vpx_highbd_8_mse8x8_c))); */ +const VpxHBDVarianceTest::ParamType kArrayHBDVariance_c[] = { +#if CONFIG_VP10 && CONFIG_EXT_PARTITION + make_tuple(7, 7, &vpx_highbd_12_variance128x128_c, 12), + make_tuple(7, 6, &vpx_highbd_12_variance128x64_c, 12), + make_tuple(6, 7, &vpx_highbd_12_variance64x128_c, 12), +#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION + make_tuple(6, 6, &vpx_highbd_12_variance64x64_c, 12), + make_tuple(6, 5, &vpx_highbd_12_variance64x32_c, 12), + make_tuple(5, 6, &vpx_highbd_12_variance32x64_c, 12), + make_tuple(5, 5, &vpx_highbd_12_variance32x32_c, 12), + make_tuple(5, 4, &vpx_highbd_12_variance32x16_c, 12), + make_tuple(4, 5, &vpx_highbd_12_variance16x32_c, 12), + make_tuple(4, 4, &vpx_highbd_12_variance16x16_c, 12), + make_tuple(4, 3, &vpx_highbd_12_variance16x8_c, 12), + make_tuple(3, 4, &vpx_highbd_12_variance8x16_c, 12), + make_tuple(3, 3, &vpx_highbd_12_variance8x8_c, 12), + make_tuple(3, 2, &vpx_highbd_12_variance8x4_c, 12), + make_tuple(2, 3, &vpx_highbd_12_variance4x8_c, 12), + make_tuple(2, 2, &vpx_highbd_12_variance4x4_c, 12), +#if CONFIG_VP10 && CONFIG_EXT_PARTITION + make_tuple(7, 7, &vpx_highbd_10_variance128x128_c, 10), + make_tuple(7, 6, &vpx_highbd_10_variance128x64_c, 10), + make_tuple(6, 7, &vpx_highbd_10_variance64x128_c, 10), +#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION + make_tuple(6, 6, &vpx_highbd_10_variance64x64_c, 10), + make_tuple(6, 5, &vpx_highbd_10_variance64x32_c, 10), + make_tuple(5, 6, &vpx_highbd_10_variance32x64_c, 10), + make_tuple(5, 5, &vpx_highbd_10_variance32x32_c, 10), + make_tuple(5, 4, &vpx_highbd_10_variance32x16_c, 10), + make_tuple(4, 5, &vpx_highbd_10_variance16x32_c, 10), + make_tuple(4, 4, &vpx_highbd_10_variance16x16_c, 10), + make_tuple(4, 3, &vpx_highbd_10_variance16x8_c, 10), + make_tuple(3, 4, &vpx_highbd_10_variance8x16_c, 10), + make_tuple(3, 3, &vpx_highbd_10_variance8x8_c, 10), + make_tuple(3, 2, &vpx_highbd_10_variance8x4_c, 10), + make_tuple(2, 3, &vpx_highbd_10_variance4x8_c, 10), + make_tuple(2, 2, &vpx_highbd_10_variance4x4_c, 10), +#if CONFIG_VP10 && CONFIG_EXT_PARTITION + make_tuple(7, 7, &vpx_highbd_8_variance128x128_c, 8), + make_tuple(7, 6, &vpx_highbd_8_variance128x64_c, 8), + make_tuple(6, 7, &vpx_highbd_8_variance64x128_c, 8), +#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION + make_tuple(6, 6, &vpx_highbd_8_variance64x64_c, 8), + make_tuple(6, 5, &vpx_highbd_8_variance64x32_c, 8), + make_tuple(5, 6, &vpx_highbd_8_variance32x64_c, 8), + make_tuple(5, 5, &vpx_highbd_8_variance32x32_c, 8), + make_tuple(5, 4, &vpx_highbd_8_variance32x16_c, 8), + make_tuple(4, 5, &vpx_highbd_8_variance16x32_c, 8), + make_tuple(4, 4, &vpx_highbd_8_variance16x16_c, 8), + make_tuple(4, 3, &vpx_highbd_8_variance16x8_c, 8), + make_tuple(3, 4, &vpx_highbd_8_variance8x16_c, 8), + make_tuple(3, 3, &vpx_highbd_8_variance8x8_c, 8), + make_tuple(3, 2, &vpx_highbd_8_variance8x4_c, 8), + make_tuple(2, 3, &vpx_highbd_8_variance4x8_c, 8), + make_tuple(2, 2, &vpx_highbd_8_variance4x4_c, 8) +}; INSTANTIATE_TEST_CASE_P( C, VpxHBDVarianceTest, - ::testing::Values(make_tuple(6, 6, &vpx_highbd_12_variance64x64_c, 12), - make_tuple(6, 5, &vpx_highbd_12_variance64x32_c, 12), - make_tuple(5, 6, &vpx_highbd_12_variance32x64_c, 12), - make_tuple(5, 5, &vpx_highbd_12_variance32x32_c, 12), - make_tuple(5, 4, &vpx_highbd_12_variance32x16_c, 12), - make_tuple(4, 5, &vpx_highbd_12_variance16x32_c, 12), - make_tuple(4, 4, &vpx_highbd_12_variance16x16_c, 12), - make_tuple(4, 3, &vpx_highbd_12_variance16x8_c, 12), - make_tuple(3, 4, &vpx_highbd_12_variance8x16_c, 12), - make_tuple(3, 3, &vpx_highbd_12_variance8x8_c, 12), - make_tuple(3, 2, &vpx_highbd_12_variance8x4_c, 12), - make_tuple(2, 3, &vpx_highbd_12_variance4x8_c, 12), - make_tuple(2, 2, &vpx_highbd_12_variance4x4_c, 12), - make_tuple(6, 6, &vpx_highbd_10_variance64x64_c, 10), - make_tuple(6, 5, &vpx_highbd_10_variance64x32_c, 10), - make_tuple(5, 6, &vpx_highbd_10_variance32x64_c, 10), - make_tuple(5, 5, &vpx_highbd_10_variance32x32_c, 10), - make_tuple(5, 4, &vpx_highbd_10_variance32x16_c, 10), - make_tuple(4, 5, &vpx_highbd_10_variance16x32_c, 10), - make_tuple(4, 4, &vpx_highbd_10_variance16x16_c, 10), - make_tuple(4, 3, &vpx_highbd_10_variance16x8_c, 10), - make_tuple(3, 4, &vpx_highbd_10_variance8x16_c, 10), - make_tuple(3, 3, &vpx_highbd_10_variance8x8_c, 10), - make_tuple(3, 2, &vpx_highbd_10_variance8x4_c, 10), - make_tuple(2, 3, &vpx_highbd_10_variance4x8_c, 10), - make_tuple(2, 2, &vpx_highbd_10_variance4x4_c, 10), - make_tuple(6, 6, &vpx_highbd_8_variance64x64_c, 8), - make_tuple(6, 5, &vpx_highbd_8_variance64x32_c, 8), - make_tuple(5, 6, &vpx_highbd_8_variance32x64_c, 8), - make_tuple(5, 5, &vpx_highbd_8_variance32x32_c, 8), - make_tuple(5, 4, &vpx_highbd_8_variance32x16_c, 8), - make_tuple(4, 5, &vpx_highbd_8_variance16x32_c, 8), - make_tuple(4, 4, &vpx_highbd_8_variance16x16_c, 8), - make_tuple(4, 3, &vpx_highbd_8_variance16x8_c, 8), - make_tuple(3, 4, &vpx_highbd_8_variance8x16_c, 8), - make_tuple(3, 3, &vpx_highbd_8_variance8x8_c, 8), - make_tuple(3, 2, &vpx_highbd_8_variance8x4_c, 8), - make_tuple(2, 3, &vpx_highbd_8_variance4x8_c, 8), - make_tuple(2, 2, &vpx_highbd_8_variance4x4_c, 8))); + ::testing::ValuesIn(kArrayHBDVariance_c)); +#if HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( - C, VpxHBDSubpelVarianceTest, + SSE4_1, VpxHBDVarianceTest, ::testing::Values( - make_tuple(6, 6, &vpx_highbd_8_sub_pixel_variance64x64_c, 8), - make_tuple(6, 5, &vpx_highbd_8_sub_pixel_variance64x32_c, 8), - make_tuple(5, 6, &vpx_highbd_8_sub_pixel_variance32x64_c, 8), - make_tuple(5, 5, &vpx_highbd_8_sub_pixel_variance32x32_c, 8), - make_tuple(5, 4, &vpx_highbd_8_sub_pixel_variance32x16_c, 8), - make_tuple(4, 5, &vpx_highbd_8_sub_pixel_variance16x32_c, 8), - make_tuple(4, 4, &vpx_highbd_8_sub_pixel_variance16x16_c, 8), - make_tuple(4, 3, &vpx_highbd_8_sub_pixel_variance16x8_c, 8), - make_tuple(3, 4, &vpx_highbd_8_sub_pixel_variance8x16_c, 8), - make_tuple(3, 3, &vpx_highbd_8_sub_pixel_variance8x8_c, 8), - make_tuple(3, 2, &vpx_highbd_8_sub_pixel_variance8x4_c, 8), - make_tuple(2, 3, &vpx_highbd_8_sub_pixel_variance4x8_c, 8), - make_tuple(2, 2, &vpx_highbd_8_sub_pixel_variance4x4_c, 8), - make_tuple(6, 6, &vpx_highbd_10_sub_pixel_variance64x64_c, 10), - make_tuple(6, 5, &vpx_highbd_10_sub_pixel_variance64x32_c, 10), - make_tuple(5, 6, &vpx_highbd_10_sub_pixel_variance32x64_c, 10), - make_tuple(5, 5, &vpx_highbd_10_sub_pixel_variance32x32_c, 10), - make_tuple(5, 4, &vpx_highbd_10_sub_pixel_variance32x16_c, 10), - make_tuple(4, 5, &vpx_highbd_10_sub_pixel_variance16x32_c, 10), - make_tuple(4, 4, &vpx_highbd_10_sub_pixel_variance16x16_c, 10), - make_tuple(4, 3, &vpx_highbd_10_sub_pixel_variance16x8_c, 10), - make_tuple(3, 4, &vpx_highbd_10_sub_pixel_variance8x16_c, 10), - make_tuple(3, 3, &vpx_highbd_10_sub_pixel_variance8x8_c, 10), - make_tuple(3, 2, &vpx_highbd_10_sub_pixel_variance8x4_c, 10), - make_tuple(2, 3, &vpx_highbd_10_sub_pixel_variance4x8_c, 10), - make_tuple(2, 2, &vpx_highbd_10_sub_pixel_variance4x4_c, 10), - make_tuple(6, 6, &vpx_highbd_12_sub_pixel_variance64x64_c, 12), - make_tuple(6, 5, &vpx_highbd_12_sub_pixel_variance64x32_c, 12), - make_tuple(5, 6, &vpx_highbd_12_sub_pixel_variance32x64_c, 12), - make_tuple(5, 5, &vpx_highbd_12_sub_pixel_variance32x32_c, 12), - make_tuple(5, 4, &vpx_highbd_12_sub_pixel_variance32x16_c, 12), - make_tuple(4, 5, &vpx_highbd_12_sub_pixel_variance16x32_c, 12), - make_tuple(4, 4, &vpx_highbd_12_sub_pixel_variance16x16_c, 12), - make_tuple(4, 3, &vpx_highbd_12_sub_pixel_variance16x8_c, 12), - make_tuple(3, 4, &vpx_highbd_12_sub_pixel_variance8x16_c, 12), - make_tuple(3, 3, &vpx_highbd_12_sub_pixel_variance8x8_c, 12), - make_tuple(3, 2, &vpx_highbd_12_sub_pixel_variance8x4_c, 12), - make_tuple(2, 3, &vpx_highbd_12_sub_pixel_variance4x8_c, 12), - make_tuple(2, 2, &vpx_highbd_12_sub_pixel_variance4x4_c, 12))); - + make_tuple(2, 2, &vpx_highbd_8_variance4x4_sse4_1, 8), + make_tuple(2, 2, &vpx_highbd_10_variance4x4_sse4_1, 10), + make_tuple(2, 2, &vpx_highbd_12_variance4x4_sse4_1, 12))); +#endif // HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH + +const VpxHBDSubpelVarianceTest::ParamType kArrayHBDSubpelVariance_c[] = { +#if CONFIG_VP10 && CONFIG_EXT_PARTITION + make_tuple(7, 7, &vpx_highbd_8_sub_pixel_variance128x128_c, 8), + make_tuple(7, 6, &vpx_highbd_8_sub_pixel_variance128x64_c, 8), + make_tuple(6, 7, &vpx_highbd_8_sub_pixel_variance64x128_c, 8), +#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION + make_tuple(6, 6, &vpx_highbd_8_sub_pixel_variance64x64_c, 8), + make_tuple(6, 5, &vpx_highbd_8_sub_pixel_variance64x32_c, 8), + make_tuple(5, 6, &vpx_highbd_8_sub_pixel_variance32x64_c, 8), + make_tuple(5, 5, &vpx_highbd_8_sub_pixel_variance32x32_c, 8), + make_tuple(5, 4, &vpx_highbd_8_sub_pixel_variance32x16_c, 8), + make_tuple(4, 5, &vpx_highbd_8_sub_pixel_variance16x32_c, 8), + make_tuple(4, 4, &vpx_highbd_8_sub_pixel_variance16x16_c, 8), + make_tuple(4, 3, &vpx_highbd_8_sub_pixel_variance16x8_c, 8), + make_tuple(3, 4, &vpx_highbd_8_sub_pixel_variance8x16_c, 8), + make_tuple(3, 3, &vpx_highbd_8_sub_pixel_variance8x8_c, 8), + make_tuple(3, 2, &vpx_highbd_8_sub_pixel_variance8x4_c, 8), + make_tuple(2, 3, &vpx_highbd_8_sub_pixel_variance4x8_c, 8), + make_tuple(2, 2, &vpx_highbd_8_sub_pixel_variance4x4_c, 8), +#if CONFIG_VP10 && CONFIG_EXT_PARTITION + make_tuple(7, 7, &vpx_highbd_10_sub_pixel_variance128x128_c, 10), + make_tuple(7, 6, &vpx_highbd_10_sub_pixel_variance128x64_c, 10), + make_tuple(6, 7, &vpx_highbd_10_sub_pixel_variance64x128_c, 10), +#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION + make_tuple(6, 6, &vpx_highbd_10_sub_pixel_variance64x64_c, 10), + make_tuple(6, 5, &vpx_highbd_10_sub_pixel_variance64x32_c, 10), + make_tuple(5, 6, &vpx_highbd_10_sub_pixel_variance32x64_c, 10), + make_tuple(5, 5, &vpx_highbd_10_sub_pixel_variance32x32_c, 10), + make_tuple(5, 4, &vpx_highbd_10_sub_pixel_variance32x16_c, 10), + make_tuple(4, 5, &vpx_highbd_10_sub_pixel_variance16x32_c, 10), + make_tuple(4, 4, &vpx_highbd_10_sub_pixel_variance16x16_c, 10), + make_tuple(4, 3, &vpx_highbd_10_sub_pixel_variance16x8_c, 10), + make_tuple(3, 4, &vpx_highbd_10_sub_pixel_variance8x16_c, 10), + make_tuple(3, 3, &vpx_highbd_10_sub_pixel_variance8x8_c, 10), + make_tuple(3, 2, &vpx_highbd_10_sub_pixel_variance8x4_c, 10), + make_tuple(2, 3, &vpx_highbd_10_sub_pixel_variance4x8_c, 10), + make_tuple(2, 2, &vpx_highbd_10_sub_pixel_variance4x4_c, 10), +#if CONFIG_VP10 && CONFIG_EXT_PARTITION + make_tuple(7, 7, &vpx_highbd_12_sub_pixel_variance128x128_c, 12), + make_tuple(7, 6, &vpx_highbd_12_sub_pixel_variance128x64_c, 12), + make_tuple(6, 7, &vpx_highbd_12_sub_pixel_variance64x128_c, 12), +#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION + make_tuple(6, 6, &vpx_highbd_12_sub_pixel_variance64x64_c, 12), + make_tuple(6, 5, &vpx_highbd_12_sub_pixel_variance64x32_c, 12), + make_tuple(5, 6, &vpx_highbd_12_sub_pixel_variance32x64_c, 12), + make_tuple(5, 5, &vpx_highbd_12_sub_pixel_variance32x32_c, 12), + make_tuple(5, 4, &vpx_highbd_12_sub_pixel_variance32x16_c, 12), + make_tuple(4, 5, &vpx_highbd_12_sub_pixel_variance16x32_c, 12), + make_tuple(4, 4, &vpx_highbd_12_sub_pixel_variance16x16_c, 12), + make_tuple(4, 3, &vpx_highbd_12_sub_pixel_variance16x8_c, 12), + make_tuple(3, 4, &vpx_highbd_12_sub_pixel_variance8x16_c, 12), + make_tuple(3, 3, &vpx_highbd_12_sub_pixel_variance8x8_c, 12), + make_tuple(3, 2, &vpx_highbd_12_sub_pixel_variance8x4_c, 12), + make_tuple(2, 3, &vpx_highbd_12_sub_pixel_variance4x8_c, 12), + make_tuple(2, 2, &vpx_highbd_12_sub_pixel_variance4x4_c, 12) +}; +INSTANTIATE_TEST_CASE_P( + C, VpxHBDSubpelVarianceTest, + ::testing::ValuesIn(kArrayHBDSubpelVariance_c)); + +const VpxHBDSubpelAvgVarianceTest::ParamType kArrayHBDSubpelAvgVariance_c[] = { +#if CONFIG_VP10 && CONFIG_EXT_PARTITION + make_tuple(7, 7, &vpx_highbd_8_sub_pixel_avg_variance128x128_c, 8), + make_tuple(7, 6, &vpx_highbd_8_sub_pixel_avg_variance128x64_c, 8), + make_tuple(6, 7, &vpx_highbd_8_sub_pixel_avg_variance64x128_c, 8), +#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION + make_tuple(6, 6, &vpx_highbd_8_sub_pixel_avg_variance64x64_c, 8), + make_tuple(6, 5, &vpx_highbd_8_sub_pixel_avg_variance64x32_c, 8), + make_tuple(5, 6, &vpx_highbd_8_sub_pixel_avg_variance32x64_c, 8), + make_tuple(5, 5, &vpx_highbd_8_sub_pixel_avg_variance32x32_c, 8), + make_tuple(5, 4, &vpx_highbd_8_sub_pixel_avg_variance32x16_c, 8), + make_tuple(4, 5, &vpx_highbd_8_sub_pixel_avg_variance16x32_c, 8), + make_tuple(4, 4, &vpx_highbd_8_sub_pixel_avg_variance16x16_c, 8), + make_tuple(4, 3, &vpx_highbd_8_sub_pixel_avg_variance16x8_c, 8), + make_tuple(3, 4, &vpx_highbd_8_sub_pixel_avg_variance8x16_c, 8), + make_tuple(3, 3, &vpx_highbd_8_sub_pixel_avg_variance8x8_c, 8), + make_tuple(3, 2, &vpx_highbd_8_sub_pixel_avg_variance8x4_c, 8), + make_tuple(2, 3, &vpx_highbd_8_sub_pixel_avg_variance4x8_c, 8), + make_tuple(2, 2, &vpx_highbd_8_sub_pixel_avg_variance4x4_c, 8), +#if CONFIG_VP10 && CONFIG_EXT_PARTITION + make_tuple(7, 7, &vpx_highbd_10_sub_pixel_avg_variance128x128_c, 10), + make_tuple(7, 6, &vpx_highbd_10_sub_pixel_avg_variance128x64_c, 10), + make_tuple(6, 7, &vpx_highbd_10_sub_pixel_avg_variance64x128_c, 10), +#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION + make_tuple(6, 6, &vpx_highbd_10_sub_pixel_avg_variance64x64_c, 10), + make_tuple(6, 5, &vpx_highbd_10_sub_pixel_avg_variance64x32_c, 10), + make_tuple(5, 6, &vpx_highbd_10_sub_pixel_avg_variance32x64_c, 10), + make_tuple(5, 5, &vpx_highbd_10_sub_pixel_avg_variance32x32_c, 10), + make_tuple(5, 4, &vpx_highbd_10_sub_pixel_avg_variance32x16_c, 10), + make_tuple(4, 5, &vpx_highbd_10_sub_pixel_avg_variance16x32_c, 10), + make_tuple(4, 4, &vpx_highbd_10_sub_pixel_avg_variance16x16_c, 10), + make_tuple(4, 3, &vpx_highbd_10_sub_pixel_avg_variance16x8_c, 10), + make_tuple(3, 4, &vpx_highbd_10_sub_pixel_avg_variance8x16_c, 10), + make_tuple(3, 3, &vpx_highbd_10_sub_pixel_avg_variance8x8_c, 10), + make_tuple(3, 2, &vpx_highbd_10_sub_pixel_avg_variance8x4_c, 10), + make_tuple(2, 3, &vpx_highbd_10_sub_pixel_avg_variance4x8_c, 10), + make_tuple(2, 2, &vpx_highbd_10_sub_pixel_avg_variance4x4_c, 10), +#if CONFIG_VP10 && CONFIG_EXT_PARTITION + make_tuple(7, 7, &vpx_highbd_12_sub_pixel_avg_variance128x128_c, 12), + make_tuple(7, 6, &vpx_highbd_12_sub_pixel_avg_variance128x64_c, 12), + make_tuple(6, 7, &vpx_highbd_12_sub_pixel_avg_variance64x128_c, 12), +#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION + make_tuple(6, 6, &vpx_highbd_12_sub_pixel_avg_variance64x64_c, 12), + make_tuple(6, 5, &vpx_highbd_12_sub_pixel_avg_variance64x32_c, 12), + make_tuple(5, 6, &vpx_highbd_12_sub_pixel_avg_variance32x64_c, 12), + make_tuple(5, 5, &vpx_highbd_12_sub_pixel_avg_variance32x32_c, 12), + make_tuple(5, 4, &vpx_highbd_12_sub_pixel_avg_variance32x16_c, 12), + make_tuple(4, 5, &vpx_highbd_12_sub_pixel_avg_variance16x32_c, 12), + make_tuple(4, 4, &vpx_highbd_12_sub_pixel_avg_variance16x16_c, 12), + make_tuple(4, 3, &vpx_highbd_12_sub_pixel_avg_variance16x8_c, 12), + make_tuple(3, 4, &vpx_highbd_12_sub_pixel_avg_variance8x16_c, 12), + make_tuple(3, 3, &vpx_highbd_12_sub_pixel_avg_variance8x8_c, 12), + make_tuple(3, 2, &vpx_highbd_12_sub_pixel_avg_variance8x4_c, 12), + make_tuple(2, 3, &vpx_highbd_12_sub_pixel_avg_variance4x8_c, 12), + make_tuple(2, 2, &vpx_highbd_12_sub_pixel_avg_variance4x4_c, 12) +}; INSTANTIATE_TEST_CASE_P( C, VpxHBDSubpelAvgVarianceTest, - ::testing::Values( - make_tuple(6, 6, &vpx_highbd_8_sub_pixel_avg_variance64x64_c, 8), - make_tuple(6, 5, &vpx_highbd_8_sub_pixel_avg_variance64x32_c, 8), - make_tuple(5, 6, &vpx_highbd_8_sub_pixel_avg_variance32x64_c, 8), - make_tuple(5, 5, &vpx_highbd_8_sub_pixel_avg_variance32x32_c, 8), - make_tuple(5, 4, &vpx_highbd_8_sub_pixel_avg_variance32x16_c, 8), - make_tuple(4, 5, &vpx_highbd_8_sub_pixel_avg_variance16x32_c, 8), - make_tuple(4, 4, &vpx_highbd_8_sub_pixel_avg_variance16x16_c, 8), - make_tuple(4, 3, &vpx_highbd_8_sub_pixel_avg_variance16x8_c, 8), - make_tuple(3, 4, &vpx_highbd_8_sub_pixel_avg_variance8x16_c, 8), - make_tuple(3, 3, &vpx_highbd_8_sub_pixel_avg_variance8x8_c, 8), - make_tuple(3, 2, &vpx_highbd_8_sub_pixel_avg_variance8x4_c, 8), - make_tuple(2, 3, &vpx_highbd_8_sub_pixel_avg_variance4x8_c, 8), - make_tuple(2, 2, &vpx_highbd_8_sub_pixel_avg_variance4x4_c, 8), - make_tuple(6, 6, &vpx_highbd_10_sub_pixel_avg_variance64x64_c, 10), - make_tuple(6, 5, &vpx_highbd_10_sub_pixel_avg_variance64x32_c, 10), - make_tuple(5, 6, &vpx_highbd_10_sub_pixel_avg_variance32x64_c, 10), - make_tuple(5, 5, &vpx_highbd_10_sub_pixel_avg_variance32x32_c, 10), - make_tuple(5, 4, &vpx_highbd_10_sub_pixel_avg_variance32x16_c, 10), - make_tuple(4, 5, &vpx_highbd_10_sub_pixel_avg_variance16x32_c, 10), - make_tuple(4, 4, &vpx_highbd_10_sub_pixel_avg_variance16x16_c, 10), - make_tuple(4, 3, &vpx_highbd_10_sub_pixel_avg_variance16x8_c, 10), - make_tuple(3, 4, &vpx_highbd_10_sub_pixel_avg_variance8x16_c, 10), - make_tuple(3, 3, &vpx_highbd_10_sub_pixel_avg_variance8x8_c, 10), - make_tuple(3, 2, &vpx_highbd_10_sub_pixel_avg_variance8x4_c, 10), - make_tuple(2, 3, &vpx_highbd_10_sub_pixel_avg_variance4x8_c, 10), - make_tuple(2, 2, &vpx_highbd_10_sub_pixel_avg_variance4x4_c, 10), - make_tuple(6, 6, &vpx_highbd_12_sub_pixel_avg_variance64x64_c, 12), - make_tuple(6, 5, &vpx_highbd_12_sub_pixel_avg_variance64x32_c, 12), - make_tuple(5, 6, &vpx_highbd_12_sub_pixel_avg_variance32x64_c, 12), - make_tuple(5, 5, &vpx_highbd_12_sub_pixel_avg_variance32x32_c, 12), - make_tuple(5, 4, &vpx_highbd_12_sub_pixel_avg_variance32x16_c, 12), - make_tuple(4, 5, &vpx_highbd_12_sub_pixel_avg_variance16x32_c, 12), - make_tuple(4, 4, &vpx_highbd_12_sub_pixel_avg_variance16x16_c, 12), - make_tuple(4, 3, &vpx_highbd_12_sub_pixel_avg_variance16x8_c, 12), - make_tuple(3, 4, &vpx_highbd_12_sub_pixel_avg_variance8x16_c, 12), - make_tuple(3, 3, &vpx_highbd_12_sub_pixel_avg_variance8x8_c, 12), - make_tuple(3, 2, &vpx_highbd_12_sub_pixel_avg_variance8x4_c, 12), - make_tuple(2, 3, &vpx_highbd_12_sub_pixel_avg_variance4x8_c, 12), - make_tuple(2, 2, &vpx_highbd_12_sub_pixel_avg_variance4x4_c, 12))); + ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_c)); #endif // CONFIG_VP9_HIGHBITDEPTH - #if HAVE_MMX - INSTANTIATE_TEST_CASE_P(MMX, VpxMseTest, - ::testing::Values(make_tuple(4, 4, &vpx_mse16x16_mmx))); - - INSTANTIATE_TEST_CASE_P(MMX, SumOfSquaresTest, - ::testing::Values(vpx_get_mb_ss_mmx)); - - INSTANTIATE_TEST_CASE_P( - MMX, VpxVarianceTest, - ::testing::Values(make_tuple(4, 4, &vpx_variance16x16_mmx, 0), - make_tuple(4, 3, &vpx_variance16x8_mmx, 0), - make_tuple(3, 4, &vpx_variance8x16_mmx, 0), - make_tuple(3, 3, &vpx_variance8x8_mmx, 0), - make_tuple(2, 2, &vpx_variance4x4_mmx, 0))); - - INSTANTIATE_TEST_CASE_P( - MMX, VpxSubpelVarianceTest, - ::testing::Values(make_tuple(4, 4, &vpx_sub_pixel_variance16x16_mmx, 0), - make_tuple(4, 3, &vpx_sub_pixel_variance16x8_mmx, 0), - make_tuple(3, 4, &vpx_sub_pixel_variance8x16_mmx, 0), - make_tuple(3, 3, &vpx_sub_pixel_variance8x8_mmx, 0), - make_tuple(2, 2, &vpx_sub_pixel_variance4x4_mmx, 0))); - #endif // HAVE_MMX - #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P(SSE2, SumOfSquaresTest, ::testing::Values(vpx_get_mb_ss_sse2)); @@@ -1143,26 -1033,10 +1119,26 @@@ INSTANTIATE_TEST_CASE_P make_tuple(3, 4, &vpx_sub_pixel_avg_variance8x16_sse2, 0), make_tuple(3, 3, &vpx_sub_pixel_avg_variance8x8_sse2, 0), make_tuple(3, 2, &vpx_sub_pixel_avg_variance8x4_sse2, 0), - make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_sse, 0), - make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_sse, 0))); + make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_sse2, 0), + make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_sse2, 0))); #endif // CONFIG_USE_X86INC +#if HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + SSE4_1, VpxSubpelVarianceTest, + ::testing::Values( + make_tuple(2, 2, &vpx_highbd_8_sub_pixel_variance4x4_sse4_1, 8), + make_tuple(2, 2, &vpx_highbd_10_sub_pixel_variance4x4_sse4_1, 10), + make_tuple(2, 2, &vpx_highbd_12_sub_pixel_variance4x4_sse4_1, 12))); + +INSTANTIATE_TEST_CASE_P( + SSE4_1, VpxSubpelAvgVarianceTest, + ::testing::Values( + make_tuple(2, 2, &vpx_highbd_8_sub_pixel_avg_variance4x4_sse4_1, 8), + make_tuple(2, 2, &vpx_highbd_10_sub_pixel_avg_variance4x4_sse4_1, 10), + make_tuple(2, 2, &vpx_highbd_12_sub_pixel_avg_variance4x4_sse4_1, 12))); +#endif // HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH + #if CONFIG_VP9_HIGHBITDEPTH /* TODO(debargha): This test does not support the highbd version INSTANTIATE_TEST_CASE_P( diff --cc vp10/common/vp10_rtcd_defs.pl index 1b501e2bc,c8a10e57f..51b674b8d --- a/vp10/common/vp10_rtcd_defs.pl +++ b/vp10/common/vp10_rtcd_defs.pl @@@ -443,37 -397,22 +436,37 @@@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH add_proto qw/void vp10_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; specialize qw/vp10_fht16x16 sse2/; + add_proto qw/void vp10_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; + specialize qw/vp10_fht32x32/; + add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; - specialize qw/vp10_fwht4x4/, "$mmx_x86inc"; + specialize qw/vp10_fwht4x4/, "$sse2_x86inc"; } else { add_proto qw/void vp10_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; - specialize qw/vp10_fht4x4 sse2 msa/; + specialize qw/vp10_fht4x4 sse2/; add_proto qw/void vp10_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; - specialize qw/vp10_fht8x8 sse2 msa/; + specialize qw/vp10_fht8x8 sse2/; add_proto qw/void vp10_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; - specialize qw/vp10_fht16x16 sse2 msa/; + specialize qw/vp10_fht16x16 sse2/; + + if (vpx_config("CONFIG_EXT_TX") ne "yes") { + specialize qw/vp10_fht4x4 msa/; + specialize qw/vp10_fht8x8 msa/; + specialize qw/vp10_fht16x16 msa/; + } + + add_proto qw/void vp10_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; + specialize qw/vp10_fht32x32/; add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; - specialize qw/vp10_fwht4x4 msa/, "$mmx_x86inc"; + specialize qw/vp10_fwht4x4 msa/, "$sse2_x86inc"; } +add_proto qw/void vp10_fwd_idtx/, "const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type"; + specialize qw/vp10_fwd_idtx/; + # Inverse transform if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Note as optimized versions of these functions are added we need to add a check to ensure diff --cc vp10/encoder/encoder.c index d2757bf30,e7fff8299..5adba4ca9 --- a/vp10/encoder/encoder.c +++ b/vp10/encoder/encoder.c @@@ -2844,8 -1905,15 +2846,9 @@@ void vp10_remove_compressor(VP10_COMP * SNPRINT2(results, "\t%7.3f", cpi->worst_consistency); } - fprintf(f, "%s\t Time\n", headings); - fprintf(f, "%s\t%8.0f\n", results, total_encode_time); - if (cpi->b_calculate_ssimg) { - SNPRINT(headings, "\t SSIMG\tWtSSIMG"); - SNPRINT2(results, "\t%7.3f", cpi->ssimg.stat[ALL] / cpi->count); - SNPRINT2(results, "\t%7.3f", cpi->ssimg.worst); - } - + fprintf(f, "%s\t Time Rc-Err Abs Err\n", headings); + fprintf(f, "%s\t%8.0f %7.2f %7.2f\n", results, + total_encode_time, rate_err, fabs(rate_err)); } fclose(f); diff --cc vp10/vp10cx.mk index 007fb4e2a,4f265b539..5d5c88ab0 --- a/vp10/vp10cx.mk +++ b/vp10/vp10cx.mk @@@ -114,12 -103,8 +114,12 @@@ VP10_CX_SRCS-$(HAVE_SSSE3) += encoder/x endif endif - VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.c + VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_intrin_sse2.c VP10_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/dct_ssse3.c +ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) +VP10_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/highbd_fwd_txfm_sse4.c +VP10_CX_SRCS-$(HAVE_SSE4_1) += common/x86/highbd_inv_txfm_sse4.c +endif ifeq ($(CONFIG_VP9_TEMPORAL_DENOISING),yes) VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoiser_sse2.c diff --cc vp8/encoder/onyx_if.c index 0efdac428,d5a0fff35..8511af29c --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@@ -2292,14 -2303,55 +2293,16 @@@ void vp8_remove_compressor(VP8_COMP **p cpi->summed_weights, 8.0); fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\t" - "GLPsnrP\tVPXSSIM\t Time(us)\n"); + "GLPsnrP\tVPXSSIM\t Time(us) Rc-Err " + "Abs Err\n"); fprintf(f, "%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t" - "%7.3f\t%8.0f\n", + "%7.3f\t%8.0f %7.2f %7.2f\n", dr, cpi->total / cpi->count, total_psnr, cpi->totalp / cpi->count, total_psnr2, - total_ssim, total_encode_time); + total_ssim, total_encode_time, + rate_err, fabs(rate_err)); } } - - if (cpi->b_calculate_ssimg) - { - if (cpi->oxcf.number_of_layers > 1) - { - int i; - - fprintf(f, "Layer\tBitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t" - "Time(us)\n"); - for (i=0; i<(int)cpi->oxcf.number_of_layers; i++) - { - double dr = (double)cpi->bytes_in_layer[i] * - 8.0 / 1000.0 / time_encoded; - fprintf(f, "%5d\t%7.3f\t%6.4f\t" - "%6.4f\t%6.4f\t%6.4f\t%8.0f\n", - i, dr, - cpi->total_ssimg_y_in_layer[i] / - cpi->frames_in_layer[i], - cpi->total_ssimg_u_in_layer[i] / - cpi->frames_in_layer[i], - cpi->total_ssimg_v_in_layer[i] / - cpi->frames_in_layer[i], - cpi->total_ssimg_all_in_layer[i] / - cpi->frames_in_layer[i], - total_encode_time); - } - } - else - { - fprintf(f, "BitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t" - "Time(us)\n"); - fprintf(f, "%7.3f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%8.0f\n", dr, - cpi->total_ssimg_y / cpi->count, - cpi->total_ssimg_u / cpi->count, - cpi->total_ssimg_v / cpi->count, - cpi->total_ssimg_all / cpi->count, total_encode_time); - } - } - fclose(f); #if 0 f = fopen("qskip.stt", "a"); diff --cc vp9/encoder/vp9_aq_cyclicrefresh.c index b27ce6a03,3e1a0a522..d8920fbd5 --- a/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/vp9/encoder/vp9_aq_cyclicrefresh.c @@@ -20,9 -20,9 +20,8 @@@ #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_segmentation.h" - CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) { size_t last_coded_q_map_size; - size_t consec_zero_mv_size; CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr)); if (cr == NULL) return NULL; diff --cc vp9/encoder/vp9_encoder.c index 845f5aa68,3f88d9c99..fde1cb9cc --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@@ -1737,7 -1800,11 +1800,10 @@@ VP9_COMP *vp9_create_compressor(VP9Enco cpi->multi_arf_last_grp_enabled = 0; cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS; + + init_level_info(&cpi->level_info); + #if CONFIG_INTERNAL_STATS - cpi->b_calculate_ssimg = 0; cpi->b_calculate_blockiness = 1; cpi->b_calculate_consistency = 1; cpi->total_inconsistency = 0; @@@ -2053,8 -2126,16 +2121,9 @@@ void vp9_remove_compressor(VP9_COMP *cp SNPRINT2(results, "\t%7.3f", consistency); SNPRINT2(results, "\t%7.3f", cpi->worst_consistency); } - fprintf(f, "%s\t Time\n", headings); - fprintf(f, "%s\t%8.0f\n", results, total_encode_time); - - if (cpi->b_calculate_ssimg) { - SNPRINT(headings, "\t SSIMG\tWtSSIMG"); - SNPRINT2(results, "\t%7.3f", cpi->ssimg.stat[ALL] / cpi->count); - SNPRINT2(results, "\t%7.3f", cpi->ssimg.worst); - } - + fprintf(f, "%s\t Time Rc-Err Abs Err\n", headings); + fprintf(f, "%s\t%8.0f %7.2f %7.2f\n", results, + total_encode_time, rate_err, fabs(rate_err)); } fclose(f); diff --cc vpx/vp8cx.h index 710fb51e2,2752a8627..dcc35c906 --- a/vpx/vp8cx.h +++ b/vpx/vp8cx.h @@@ -570,14 -555,20 +570,29 @@@ enum vp8e_enc_control_id */ VP9E_SET_RENDER_SIZE, + /*!\brief Codec control function to set target level. + * + * 255: off (default); 0: only keep level stats; 10: target for level 1.0; + * 11: target for level 1.1; ... 62: target for level 6.2 + * + * Supported in codecs: VP9 + */ + VP9E_SET_TARGET_LEVEL, + + /*!\brief Codec control function to get bitstream level. + * + * Supported in codecs: VP9 + */ - VP9E_GET_LEVEL ++ VP9E_GET_LEVEL, ++ + /*!\brief Codec control function to set intended superblock size. + * + * By default, the superblock size is determined separately for each + * frame by the encoder. + * + * Supported in codecs: VP10 + */ + VP10E_SET_SUPERBLOCK_SIZE, }; /*!\brief vpx 1-D scaling mode @@@ -838,15 -821,15 +853,21 @@@ VPX_CTRL_USE_TYPE(VP9E_SET_COLOR_RANGE VPX_CTRL_USE_TYPE(VP9E_SET_SVC_REF_FRAME_CONFIG, vpx_svc_ref_frame_config_t *) #define VPX_CTRL_VP9E_SET_SVC_REF_FRAME_CONFIG -VPX_CTRL_USE_TYPE(VP9E_SET_RENDER_SIZE, int *) +/*!\brief + * + * TODO(rbultje) : add support of the control in ffmpeg + */ #define VPX_CTRL_VP9E_SET_RENDER_SIZE +VPX_CTRL_USE_TYPE(VP9E_SET_RENDER_SIZE, int *) + +VPX_CTRL_USE_TYPE(VP10E_SET_SUPERBLOCK_SIZE, unsigned int) +#define VPX_CTRL_VP10E_SET_SUPERBLOCK_SIZE + + VPX_CTRL_USE_TYPE(VP9E_SET_TARGET_LEVEL, unsigned int) + #define VPX_CTRL_VP9E_SET_TARGET_LEVEL + + VPX_CTRL_USE_TYPE(VP9E_GET_LEVEL, int *) + #define VPX_CTRL_VP9E_GET_LEVEL - /*!\endcond */ /*! @} - end defgroup vp8_encoder */ #ifdef __cplusplus diff --cc vpx_dsp/vpx_dsp_rtcd_defs.pl index 7526beabd,a62acb717..a04a6849d --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@@ -987,49 -961,66 +987,85 @@@ if (vpx_config("CONFIG_ENCODERS") eq "y add_proto qw/void vpx_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; specialize qw/vpx_subtract_block neon msa/, "$sse2_x86inc"; -# -# Single block SAD -# -add_proto qw/unsigned int vpx_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad64x64 avx2 neon msa/, "$sse2_x86inc"; +if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") { + # + # Sum of Squares + # + add_proto qw/uint64_t vpx_sum_squares_2d_i16/, "const int16_t *src, int stride, int size"; + specialize qw/vpx_sum_squares_2d_i16 sse2/; -add_proto qw/unsigned int vpx_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad64x32 avx2 msa/, "$sse2_x86inc"; + add_proto qw/uint64_t vpx_sum_squares_i16/, "const int16_t *src, uint32_t N"; + specialize qw/vpx_sum_squares_i16 sse2/; +} + add_proto qw/unsigned int vpx_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_sad32x64 avx2 msa/, "$sse2_x86inc"; + + add_proto qw/unsigned int vpx_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_sad32x32 avx2 neon msa/, "$sse2_x86inc"; + + add_proto qw/unsigned int vpx_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_sad32x16 avx2 msa/, "$sse2_x86inc"; + + add_proto qw/unsigned int vpx_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_sad16x32 msa/, "$sse2_x86inc"; + + add_proto qw/unsigned int vpx_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_sad16x16 media neon msa/, "$sse2_x86inc"; + + add_proto qw/unsigned int vpx_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_sad16x8 neon msa/, "$sse2_x86inc"; + + add_proto qw/unsigned int vpx_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_sad8x16 neon msa/, "$sse2_x86inc"; + + add_proto qw/unsigned int vpx_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_sad8x8 neon msa/, "$sse2_x86inc"; + + add_proto qw/unsigned int vpx_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_sad8x4 msa/, "$sse2_x86inc"; + + add_proto qw/unsigned int vpx_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_sad4x8 msa/, "$sse2_x86inc"; + + add_proto qw/unsigned int vpx_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_sad4x4 neon msa/, "$sse2_x86inc"; + + # + # Avg + # if ((vpx_config("CONFIG_VP9_ENCODER") eq "yes") || (vpx_config("CONFIG_VP10_ENCODER") eq "yes")) { + # + # Avg + # add_proto qw/unsigned int vpx_avg_8x8/, "const uint8_t *, int p"; specialize qw/vpx_avg_8x8 sse2 neon msa/; - add_proto qw/unsigned int vpx_avg_4x4/, "const uint8_t *, int p"; specialize qw/vpx_avg_4x4 sse2 neon msa/; + if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + add_proto qw/unsigned int vpx_highbd_avg_8x8/, "const uint8_t *, int p"; + specialize qw/vpx_highbd_avg_8x8/; + add_proto qw/unsigned int vpx_highbd_avg_4x4/, "const uint8_t *, int p"; + specialize qw/vpx_highbd_avg_4x4/; + add_proto qw/void vpx_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd"; + specialize qw/vpx_highbd_subtract_block sse2/; + } + # + # Minmax + # add_proto qw/void vpx_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; - specialize qw/vpx_minmax_8x8 sse2/; + specialize qw/vpx_minmax_8x8 sse2 neon/; + if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + add_proto qw/void vpx_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; + specialize qw/vpx_highbd_minmax_8x8/; + } add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff"; - specialize qw/vpx_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc"; + specialize qw/vpx_hadamard_8x8 sse2 neon/, "$ssse3_x86_64_x86inc"; add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff"; - specialize qw/vpx_hadamard_16x16 sse2/; + specialize qw/vpx_hadamard_16x16 sse2 neon/; add_proto qw/int vpx_satd/, "const int16_t *coeff, int length"; specialize qw/vpx_satd sse2 neon/; @@@ -1044,98 -1035,44 +1080,98 @@@ specialize qw/vpx_vector_var neon sse2/; } # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER -add_proto qw/unsigned int vpx_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad64x64_avg avx2 msa/, "$sse2_x86inc"; - -add_proto qw/unsigned int vpx_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad64x32_avg avx2 msa/, "$sse2_x86inc"; - -add_proto qw/unsigned int vpx_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad32x64_avg avx2 msa/, "$sse2_x86inc"; - -add_proto qw/unsigned int vpx_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad32x32_avg avx2 msa/, "$sse2_x86inc"; - -add_proto qw/unsigned int vpx_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad32x16_avg avx2 msa/, "$sse2_x86inc"; - -add_proto qw/unsigned int vpx_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad16x32_avg msa/, "$sse2_x86inc"; - -add_proto qw/unsigned int vpx_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad16x16_avg msa/, "$sse2_x86inc"; +# +# Single block SAD / Single block Avg SAD +# +foreach (@block_sizes) { + ($w, $h) = @$_; + add_proto qw/unsigned int/, "vpx_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + add_proto qw/unsigned int/, "vpx_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; +} -add_proto qw/unsigned int vpx_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad16x8_avg msa/, "$sse2_x86inc"; +specialize qw/vpx_sad128x128 /, "$sse2_x86inc"; +specialize qw/vpx_sad128x64 /, "$sse2_x86inc"; +specialize qw/vpx_sad64x128 /, "$sse2_x86inc"; +specialize qw/vpx_sad64x64 avx2 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad64x32 avx2 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad32x64 avx2 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad32x32 avx2 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad32x16 avx2 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad16x32 msa/, "$sse2_x86inc"; - specialize qw/vpx_sad16x16 mmx media neon msa/, "$sse2_x86inc"; - specialize qw/vpx_sad16x8 mmx neon msa/, "$sse2_x86inc"; - specialize qw/vpx_sad8x16 mmx neon msa/, "$sse2_x86inc"; - specialize qw/vpx_sad8x8 mmx neon msa/, "$sse2_x86inc"; ++specialize qw/vpx_sad16x16 media neon msa/, "$sse2_x86inc"; ++specialize qw/vpx_sad16x8 neon msa/, "$sse2_x86inc"; ++specialize qw/vpx_sad8x16 neon msa/, "$sse2_x86inc"; ++specialize qw/vpx_sad8x8 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad8x4 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad4x8 msa/, "$sse2_x86inc"; - specialize qw/vpx_sad4x4 mmx neon msa/, "$sse2_x86inc"; ++specialize qw/vpx_sad4x4 neon msa/, "$sse2_x86inc"; + +specialize qw/vpx_sad128x128_avg /, "$sse2_x86inc"; +specialize qw/vpx_sad128x64_avg /, "$sse2_x86inc"; +specialize qw/vpx_sad64x128_avg /, "$sse2_x86inc"; +specialize qw/vpx_sad64x64_avg avx2 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad64x32_avg avx2 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad32x64_avg avx2 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad32x32_avg avx2 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad32x16_avg avx2 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad16x32_avg msa/, "$sse2_x86inc"; +specialize qw/vpx_sad16x16_avg msa/, "$sse2_x86inc"; +specialize qw/vpx_sad16x8_avg msa/, "$sse2_x86inc"; +specialize qw/vpx_sad8x16_avg msa/, "$sse2_x86inc"; +specialize qw/vpx_sad8x8_avg msa/, "$sse2_x86inc"; +specialize qw/vpx_sad8x4_avg msa/, "$sse2_x86inc"; +specialize qw/vpx_sad4x8_avg msa/, "$sse2_x86inc"; +specialize qw/vpx_sad4x4_avg msa/, "$sse2_x86inc"; -add_proto qw/unsigned int vpx_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad8x16_avg msa/, "$sse2_x86inc"; +if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + foreach (@block_sizes) { + ($w, $h) = @$_; + add_proto qw/unsigned int/, "vpx_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + add_proto qw/unsigned int/, "vpx_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; + if ($w != 128 && $h != 128 && $w != 4) { + specialize "vpx_highbd_sad${w}x${h}", "$sse2_x86inc"; + specialize "vpx_highbd_sad${w}x${h}_avg", "$sse2_x86inc"; + } + } +} -add_proto qw/unsigned int vpx_sad8x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad8x8_avg msa/, "$sse2_x86inc"; +# +# Masked SAD +# +if (vpx_config("CONFIG_EXT_INTER") eq "yes") { + foreach (@block_sizes) { + ($w, $h) = @$_; + add_proto qw/unsigned int/, "vpx_masked_sad${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize "vpx_masked_sad${w}x${h}", qw/ssse3/; + } -add_proto qw/unsigned int vpx_sad8x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad8x4_avg msa/, "$sse2_x86inc"; + if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + foreach (@block_sizes) { + ($w, $h) = @$_; + add_proto qw/unsigned int/, "vpx_highbd_masked_sad${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize "vpx_highbd_masked_sad${w}x${h}", qw/ssse3/; + } + } +} -add_proto qw/unsigned int vpx_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad4x8_avg msa/, "$sse2_x86inc"; +# +# OBMC SAD +# +if (vpx_config("CONFIG_OBMC") eq "yes") { + foreach (@block_sizes) { + ($w, $h) = @$_; + add_proto qw/unsigned int/, "vpx_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask"; + specialize "vpx_obmc_sad${w}x${h}", qw/sse4_1/; + } -add_proto qw/unsigned int vpx_sad4x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad4x4_avg msa/, "$sse2_x86inc"; + if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + foreach (@block_sizes) { + ($w, $h) = @$_; + add_proto qw/unsigned int/, "vpx_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask"; + specialize "vpx_highbd_obmc_sad${w}x${h}", qw/sse4_1/; + } + } +} # # Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally @@@ -1251,205 -1431,489 +1329,609 @@@ add_proto qw/unsigned int vpx_variance4 # Specialty Variance # add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; - specialize qw/vpx_get16x16var sse2 avx2 neon msa/; + add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; - specialize qw/vpx_get8x8var sse2 neon msa/; - specialize qw/vpx_get16x16var avx2 sse2 neon msa/; - specialize qw/vpx_get8x8var mmx sse2 neon msa/; -add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - specialize qw/vpx_mse16x16 sse2 avx2 media neon msa/; ++specialize qw/vpx_get16x16var sse2 avx2 neon msa/; ++specialize qw/vpx_get8x8var sse2 neon msa/; + -add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - specialize qw/vpx_mse16x8 sse2 msa/; +add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; +add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; add_proto qw/unsigned int vpx_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - specialize qw/vpx_mse8x16 sse2 msa/; - add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - specialize qw/vpx_mse8x8 sse2 msa/; - specialize qw/vpx_mse16x16 mmx avx2 sse2 media neon msa/; - specialize qw/vpx_mse16x8 sse2 msa/; - specialize qw/vpx_mse8x16 sse2 msa/; - specialize qw/vpx_mse8x8 sse2 msa/; -add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *"; - specialize qw/vpx_get_mb_ss sse2 msa/; ++specialize qw/vpx_mse16x16 sse2 avx2 media neon msa/; ++specialize qw/vpx_mse16x8 sse2 msa/; ++specialize qw/vpx_mse8x16 sse2 msa/; ++specialize qw/vpx_mse8x8 sse2 msa/; -add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride"; - specialize qw/vpx_get4x4sse_cs neon msa/; +if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + foreach $bd (8, 10, 12) { + add_proto qw/void/, "vpx_highbd_${bd}_get16x16var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; + add_proto qw/void/, "vpx_highbd_${bd}_get8x8var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; -add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride"; + add_proto qw/unsigned int/, "vpx_highbd_${bd}_mse16x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + add_proto qw/unsigned int/, "vpx_highbd_${bd}_mse16x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + add_proto qw/unsigned int/, "vpx_highbd_${bd}_mse8x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + add_proto qw/unsigned int/, "vpx_highbd_${bd}_mse8x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + + specialize "vpx_highbd_${bd}_mse16x16", qw/sse2/; + specialize "vpx_highbd_${bd}_mse8x8", qw/sse2/; + } +} # -# Subpixel Variance +# ... # -add_proto qw/uint32_t vpx_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance64x64 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc"; - -add_proto qw/uint32_t vpx_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc"; +add_proto qw/void vpx_upsampled_pred/, "uint8_t *comp_pred, int width, int height, const uint8_t *ref, int ref_stride"; +specialize qw/vpx_upsampled_pred sse2/; +add_proto qw/void vpx_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride"; +specialize qw/vpx_comp_avg_upsampled_pred sse2/; -add_proto qw/uint32_t vpx_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc"; - -add_proto qw/uint32_t vpx_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance32x32 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc"; +if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + add_proto qw/void vpx_highbd_upsampled_pred/, "uint16_t *comp_pred, int width, int height, const uint8_t *ref8, int ref_stride"; + specialize qw/vpx_highbd_upsampled_pred sse2/; + add_proto qw/void vpx_highbd_comp_avg_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride"; + specialize qw/vpx_highbd_comp_avg_upsampled_pred sse2/; +} -add_proto qw/uint32_t vpx_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc"; +# +# ... +# +add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *"; +add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride"; - specialize qw/vpx_get_mb_ss mmx sse2 msa/; -add_proto qw/uint32_t vpx_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc"; ++specialize qw/vpx_get_mb_ss sse2 msa/; +specialize qw/vpx_get4x4sse_cs neon msa/; -add_proto qw/uint32_t vpx_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance16x16 media neon msa/, "$sse2_x86inc", "$ssse3_x86inc"; +# +# Variance / Subpixel Variance / Subpixel Avg Variance +# +foreach (@block_sizes) { + ($w, $h) = @$_; + add_proto qw/unsigned int/, "vpx_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/uint32_t/, "vpx_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t/, "vpx_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; +} -add_proto qw/uint32_t vpx_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance16x8 msa/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vpx_variance64x64 sse2 avx2 neon msa/; +specialize qw/vpx_variance64x32 sse2 avx2 neon msa/; +specialize qw/vpx_variance32x64 sse2 neon msa/; +specialize qw/vpx_variance32x32 sse2 avx2 neon msa/; +specialize qw/vpx_variance32x16 sse2 avx2 msa/; +specialize qw/vpx_variance16x32 sse2 msa/; - specialize qw/vpx_variance16x16 mmx sse2 avx2 media neon msa/; - specialize qw/vpx_variance16x8 mmx sse2 neon msa/; - specialize qw/vpx_variance8x16 mmx sse2 neon msa/; - specialize qw/vpx_variance8x8 mmx sse2 media neon msa/; ++specialize qw/vpx_variance16x16 sse2 avx2 media neon msa/; ++specialize qw/vpx_variance16x8 sse2 neon msa/; ++specialize qw/vpx_variance8x16 sse2 neon msa/; ++specialize qw/vpx_variance8x8 sse2 media neon msa/; +specialize qw/vpx_variance8x4 sse2 msa/; +specialize qw/vpx_variance4x8 sse2 msa/; - specialize qw/vpx_variance4x4 mmx sse2 msa/; ++specialize qw/vpx_variance4x4 sse2 msa/; + +specialize qw/vpx_sub_pixel_variance64x64 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vpx_sub_pixel_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vpx_sub_pixel_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vpx_sub_pixel_variance32x32 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vpx_sub_pixel_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vpx_sub_pixel_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc"; - specialize qw/vpx_sub_pixel_variance16x16 mmx media neon msa/, "$sse2_x86inc", "$ssse3_x86inc"; - specialize qw/vpx_sub_pixel_variance16x8 mmx msa/, "$sse2_x86inc", "$ssse3_x86inc"; - specialize qw/vpx_sub_pixel_variance8x16 mmx msa/, "$sse2_x86inc", "$ssse3_x86inc"; - specialize qw/vpx_sub_pixel_variance8x8 mmx media neon msa/, "$sse2_x86inc", "$ssse3_x86inc"; ++specialize qw/vpx_sub_pixel_variance16x16 media neon msa/, "$sse2_x86inc", "$ssse3_x86inc"; ++specialize qw/vpx_sub_pixel_variance16x8 msa/, "$sse2_x86inc", "$ssse3_x86inc"; ++specialize qw/vpx_sub_pixel_variance8x16 msa/, "$sse2_x86inc", "$ssse3_x86inc"; ++specialize qw/vpx_sub_pixel_variance8x8 media neon msa/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vpx_sub_pixel_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc"; - specialize qw/vpx_sub_pixel_variance4x8 msa/, "$sse_x86inc", "$ssse3_x86inc"; - specialize qw/vpx_sub_pixel_variance4x4 mmx msa/, "$sse_x86inc", "$ssse3_x86inc"; ++specialize qw/vpx_sub_pixel_variance4x8 msa/, "$sse2_x86inc", "$ssse3_x86inc"; ++specialize qw/vpx_sub_pixel_variance4x4 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + +specialize qw/vpx_sub_pixel_avg_variance64x64 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vpx_sub_pixel_avg_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vpx_sub_pixel_avg_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vpx_sub_pixel_avg_variance32x32 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vpx_sub_pixel_avg_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vpx_sub_pixel_avg_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vpx_sub_pixel_avg_variance16x16 msa/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vpx_sub_pixel_avg_variance16x8 msa/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vpx_sub_pixel_avg_variance8x16 msa/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vpx_sub_pixel_avg_variance8x8 msa/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vpx_sub_pixel_avg_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc"; - specialize qw/vpx_sub_pixel_avg_variance4x8 msa/, "$sse_x86inc", "$ssse3_x86inc"; - specialize qw/vpx_sub_pixel_avg_variance4x4 msa/, "$sse_x86inc", "$ssse3_x86inc"; ++specialize qw/vpx_sub_pixel_avg_variance4x8 msa/, "$sse2_x86inc", "$ssse3_x86inc"; ++specialize qw/vpx_sub_pixel_avg_variance4x4 msa/, "$sse2_x86inc", "$ssse3_x86inc"; -add_proto qw/uint32_t vpx_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance8x16 msa/, "$sse2_x86inc", "$ssse3_x86inc"; +if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + foreach $bd (8, 10, 12) { + foreach (@block_sizes) { + ($w, $h) = @$_; + add_proto qw/unsigned int/, "vpx_highbd_${bd}_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t/, "vpx_highbd_${bd}_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t/, "vpx_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + if ($w != 128 && $h != 128 && $w != 4 && $h != 4) { + specialize "vpx_highbd_${bd}_variance${w}x${h}", "sse2"; + } + if ($w == 4 && $h == 4) { + specialize "vpx_highbd_${bd}_variance${w}x${h}", "sse4_1"; + } + if ($w != 128 && $h != 128 && $w != 4) { + specialize "vpx_highbd_${bd}_sub_pixel_variance${w}x${h}", $sse2_x86inc; + specialize "vpx_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", $sse2_x86inc; + } + if ($w == 4 && $h == 4) { + specialize "vpx_highbd_${bd}_sub_pixel_variance${w}x${h}", "sse4_1"; + specialize "vpx_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "sse4_1"; + } + } + } +} # CONFIG_VP9_HIGHBITDEPTH -add_proto qw/uint32_t vpx_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance8x8 media neon msa/, "$sse2_x86inc", "$ssse3_x86inc"; +if (vpx_config("CONFIG_EXT_INTER") eq "yes") { +# +# Masked Variance / Masked Subpixel Variance +# + foreach (@block_sizes) { + ($w, $h) = @$_; + add_proto qw/unsigned int/, "vpx_masked_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + add_proto qw/unsigned int/, "vpx_masked_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize "vpx_masked_variance${w}x${h}", qw/ssse3/; + specialize "vpx_masked_sub_pixel_variance${w}x${h}", qw/ssse3/; + } -add_proto qw/uint32_t vpx_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + foreach $bd ("_", "_10_", "_12_") { + foreach (@block_sizes) { + ($w, $h) = @$_; + add_proto qw/unsigned int/, "vpx_highbd${bd}masked_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + add_proto qw/unsigned int/, "vpx_highbd${bd}masked_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize "vpx_highbd${bd}masked_variance${w}x${h}", qw/ssse3/; + specialize "vpx_highbd${bd}masked_sub_pixel_variance${w}x${h}", qw/ssse3/; + } + } + } +} -add_proto qw/uint32_t vpx_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance4x8 msa/, "$sse2_x86inc", "$ssse3_x86inc"; +# +# OBMC Variance / OBMC Subpixel Variance +# +if (vpx_config("CONFIG_OBMC") eq "yes") { + foreach (@block_sizes) { + ($w, $h) = @$_; + add_proto qw/unsigned int/, "vpx_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; + add_proto qw/unsigned int/, "vpx_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; + specialize "vpx_obmc_variance${w}x${h}", q/sse4_1/; + specialize "vpx_obmc_sub_pixel_variance${w}x${h}"; + } -add_proto qw/uint32_t vpx_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance4x4 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + foreach $bd ("_", "_10_", "_12_") { + foreach (@block_sizes) { + ($w, $h) = @$_; + add_proto qw/unsigned int/, "vpx_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; + add_proto qw/unsigned int/, "vpx_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; + specialize "vpx_highbd${bd}obmc_variance${w}x${h}", qw/sse4_1/; + specialize "vpx_highbd${bd}obmc_sub_pixel_variance${w}x${h}"; + } + } + } +} + add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_sub_pixel_avg_variance64x64 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + + add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_sub_pixel_avg_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + + add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_sub_pixel_avg_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + + add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_sub_pixel_avg_variance32x32 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + + add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_sub_pixel_avg_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + + add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_sub_pixel_avg_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + + add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_sub_pixel_avg_variance16x16 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + + add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_sub_pixel_avg_variance16x8 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + + add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_sub_pixel_avg_variance8x16 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + + add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_sub_pixel_avg_variance8x8 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + + add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_sub_pixel_avg_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + + add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_sub_pixel_avg_variance4x8 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + + add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_sub_pixel_avg_variance4x4 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + # # Specialty Subpixel # - add_proto qw/uint32_t vpx_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_variance_halfpixvar16x16_h mmx sse2 media/; + add_proto qw/uint32_t vpx_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_variance_halfpixvar16x16_h sse2 media/; - add_proto qw/uint32_t vpx_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_variance_halfpixvar16x16_v mmx sse2 media/; + add_proto qw/uint32_t vpx_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_variance_halfpixvar16x16_v sse2 media/; - add_proto qw/uint32_t vpx_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_variance_halfpixvar16x16_hv mmx sse2 media/; + add_proto qw/uint32_t vpx_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_variance_halfpixvar16x16_hv sse2 media/; +# +# Comp Avg +# +add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride"; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + add_proto qw/unsigned int vpx_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_12_variance64x64 sse2/; + + add_proto qw/unsigned int vpx_highbd_12_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_12_variance64x32 sse2/; + + add_proto qw/unsigned int vpx_highbd_12_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_12_variance32x64 sse2/; + + add_proto qw/unsigned int vpx_highbd_12_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_12_variance32x32 sse2/; + + add_proto qw/unsigned int vpx_highbd_12_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_12_variance32x16 sse2/; + + add_proto qw/unsigned int vpx_highbd_12_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_12_variance16x32 sse2/; + + add_proto qw/unsigned int vpx_highbd_12_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_12_variance16x16 sse2/; + + add_proto qw/unsigned int vpx_highbd_12_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_12_variance16x8 sse2/; + + add_proto qw/unsigned int vpx_highbd_12_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_12_variance8x16 sse2/; + + add_proto qw/unsigned int vpx_highbd_12_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_12_variance8x8 sse2/; + + add_proto qw/unsigned int vpx_highbd_12_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + + add_proto qw/unsigned int vpx_highbd_10_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_10_variance64x64 sse2/; + + add_proto qw/unsigned int vpx_highbd_10_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_10_variance64x32 sse2/; + + add_proto qw/unsigned int vpx_highbd_10_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_10_variance32x64 sse2/; + + add_proto qw/unsigned int vpx_highbd_10_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_10_variance32x32 sse2/; + + add_proto qw/unsigned int vpx_highbd_10_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_10_variance32x16 sse2/; + + add_proto qw/unsigned int vpx_highbd_10_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_10_variance16x32 sse2/; + + add_proto qw/unsigned int vpx_highbd_10_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_10_variance16x16 sse2/; + + add_proto qw/unsigned int vpx_highbd_10_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_10_variance16x8 sse2/; + + add_proto qw/unsigned int vpx_highbd_10_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_10_variance8x16 sse2/; + + add_proto qw/unsigned int vpx_highbd_10_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_10_variance8x8 sse2/; + + add_proto qw/unsigned int vpx_highbd_10_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + + add_proto qw/unsigned int vpx_highbd_8_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_8_variance64x64 sse2/; + + add_proto qw/unsigned int vpx_highbd_8_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_8_variance64x32 sse2/; + + add_proto qw/unsigned int vpx_highbd_8_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_8_variance32x64 sse2/; + + add_proto qw/unsigned int vpx_highbd_8_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_8_variance32x32 sse2/; + + add_proto qw/unsigned int vpx_highbd_8_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_8_variance32x16 sse2/; + + add_proto qw/unsigned int vpx_highbd_8_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_8_variance16x32 sse2/; + + add_proto qw/unsigned int vpx_highbd_8_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_8_variance16x16 sse2/; + + add_proto qw/unsigned int vpx_highbd_8_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_8_variance16x8 sse2/; + + add_proto qw/unsigned int vpx_highbd_8_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_8_variance8x16 sse2/; + + add_proto qw/unsigned int vpx_highbd_8_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_highbd_8_variance8x8 sse2/; + + add_proto qw/unsigned int vpx_highbd_8_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + + add_proto qw/void vpx_highbd_8_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; + add_proto qw/void vpx_highbd_8_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; + + add_proto qw/void vpx_highbd_10_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; + add_proto qw/void vpx_highbd_10_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; + + add_proto qw/void vpx_highbd_12_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; + add_proto qw/void vpx_highbd_12_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; + + add_proto qw/unsigned int vpx_highbd_8_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + specialize qw/vpx_highbd_8_mse16x16 sse2/; + + add_proto qw/unsigned int vpx_highbd_8_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + specialize qw/vpx_highbd_8_mse8x8 sse2/; + + add_proto qw/unsigned int vpx_highbd_10_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + specialize qw/vpx_highbd_10_mse16x16 sse2/; + + add_proto qw/unsigned int vpx_highbd_10_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + specialize qw/vpx_highbd_10_mse8x8 sse2/; + + add_proto qw/unsigned int vpx_highbd_12_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + specialize qw/vpx_highbd_12_mse16x16 sse2/; + + add_proto qw/unsigned int vpx_highbd_12_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + specialize qw/vpx_highbd_12_mse8x8 sse2/; + add_proto qw/void vpx_highbd_comp_avg_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride"; + + # + # Subpixel Variance + # + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_12_sub_pixel_variance64x64/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_12_sub_pixel_variance64x32/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_12_sub_pixel_variance32x64/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_12_sub_pixel_variance32x32/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_12_sub_pixel_variance32x16/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_12_sub_pixel_variance16x32/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_12_sub_pixel_variance16x16/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_12_sub_pixel_variance16x8/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_12_sub_pixel_variance8x16/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_12_sub_pixel_variance8x8/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_12_sub_pixel_variance8x4/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_10_sub_pixel_variance64x64/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_10_sub_pixel_variance64x32/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_10_sub_pixel_variance32x64/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_10_sub_pixel_variance32x32/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_10_sub_pixel_variance32x16/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_10_sub_pixel_variance16x32/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_10_sub_pixel_variance16x16/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_10_sub_pixel_variance16x8/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_10_sub_pixel_variance8x16/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_10_sub_pixel_variance8x8/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_10_sub_pixel_variance8x4/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_8_sub_pixel_variance64x64/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_8_sub_pixel_variance64x32/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_8_sub_pixel_variance32x64/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_8_sub_pixel_variance32x32/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_8_sub_pixel_variance32x16/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_8_sub_pixel_variance16x32/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_8_sub_pixel_variance16x16/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_8_sub_pixel_variance16x8/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_8_sub_pixel_variance8x16/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_8_sub_pixel_variance8x8/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + specialize qw/vpx_highbd_8_sub_pixel_variance8x4/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x64/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x32/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x64/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x32/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x16/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x32/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x16/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x8/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x16/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x8/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x4/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x64/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x32/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x64/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x32/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x16/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x32/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x16/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x8/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x16/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x8/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x4/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x64/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x32/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x64/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x32/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x16/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x32/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x16/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x8/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x16/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x8/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x4/, "$sse2_x86inc"; + + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + + } # CONFIG_VP9_HIGHBITDEPTH + + # + # Post Processing + # + if (vpx_config("CONFIG_POSTPROC") eq "yes" || vpx_config("CONFIG_VP9_POSTPROC") eq "yes") { + add_proto qw/void vpx_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch"; + specialize qw/vpx_plane_add_noise sse2 msa/; } } # CONFIG_ENCODERS || CONFIG_POSTPROC || CONFIG_VP9_POSTPROC