From 59de0c0bc70ea06c017333c518a8e9ae1f183f95 Mon Sep 17 00:00:00 2001 From: Debargha Mukherjee Date: Thu, 12 Nov 2015 17:14:32 -0800 Subject: [PATCH] Adding encoder support for 128x128 coding units Changes to allow the encoder to make use of 128x128 coding units. Change-Id: I340bd38f9d9750cb6346d83885efb00443852910 --- test/vp9_subtract_test.cc | 2 +- vp9/common/vp9_rtcd_defs.pl | 216 +++++++++++++++++++++++++++- vp9/encoder/vp9_context_tree.c | 13 ++ vp9/encoder/vp9_encodeframe.c | 94 ++++++++++-- vp9/encoder/vp9_encoder.c | 124 ++++++++++++++++ vp9/encoder/vp9_rd.c | 5 +- vp9/encoder/vp9_rdopt.c | 2 +- vp9/encoder/vp9_sad.c | 32 +++++ vp9/encoder/vp9_variance.c | 28 ++++ vp9/encoder/x86/vp9_denoiser_sse2.c | 7 + 10 files changed, 510 insertions(+), 13 deletions(-) diff --git a/test/vp9_subtract_test.cc b/test/vp9_subtract_test.cc index fabb43824..92dc476b5 100644 --- a/test/vp9_subtract_test.cc +++ b/test/vp9_subtract_test.cc @@ -91,7 +91,7 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) { INSTANTIATE_TEST_CASE_P(C, VP9SubtractBlockTest, ::testing::Values(vp9_subtract_block_c)); -#if HAVE_SSE2 && CONFIG_USE_X86INC +#if HAVE_SSE2 && CONFIG_USE_X86INC && !CONFIG_EXT_CODING_UNIT_SIZE INSTANTIATE_TEST_CASE_P(SSE2, VP9SubtractBlockTest, ::testing::Values(vp9_subtract_block_sse2)); #endif diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 27dfb36bb..e7e4e9a80 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -1058,6 +1058,17 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { # variance +if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { +add_proto qw/unsigned int vp9_variance128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance128x128/; + +add_proto qw/unsigned int vp9_variance128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance128x64/; + +add_proto qw/unsigned int vp9_variance64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance64x128/; +} + add_proto qw/unsigned int vp9_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_variance32x16 avx2/, "$sse2_x86inc"; @@ -1103,6 +1114,26 @@ specialize qw/vp9_variance4x8/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_variance4x4/, "$sse2_x86inc"; +if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { +add_proto qw/unsigned int vp9_sub_pixel_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance128x128/; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance128x128/; + +add_proto qw/unsigned int vp9_sub_pixel_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance128x64/; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance128x64/; + +add_proto qw/unsigned int vp9_sub_pixel_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance64x128/; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance64x128/; +} + add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; @@ -1183,6 +1214,17 @@ specialize qw/vp9_sub_pixel_variance4x4/, "$sse_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc"; +if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { +add_proto qw/unsigned int vp9_sad128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; +specialize qw/vp9_sad128x128/; + +add_proto qw/unsigned int vp9_sad128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; +specialize qw/vp9_sad128x64/; + +add_proto qw/unsigned int vp9_sad64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; +specialize qw/vp9_sad64x128/; +} + add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad64x64 neon avx2/, "$sse2_x86inc"; @@ -1222,6 +1264,17 @@ specialize qw/vp9_sad4x8/, "$sse_x86inc"; add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad4x4/, "$sse_x86inc"; +if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { +add_proto qw/unsigned int vp9_sad128x128_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; +specialize qw/vp9_sad128x128_avg/; + +add_proto qw/unsigned int vp9_sad128x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; +specialize qw/vp9_sad128x64_avg/; + +add_proto qw/unsigned int vp9_sad64x128_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; +specialize qw/vp9_sad64x128_avg/; +} + add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad64x64_avg avx2/, "$sse2_x86inc"; @@ -1261,6 +1314,11 @@ specialize qw/vp9_sad4x8_avg/, "$sse_x86inc"; add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad4x4_avg/, "$sse_x86inc"; +if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { +add_proto qw/void vp9_sad128x128x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad128x128x3/; +} + add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; specialize qw/vp9_sad64x64x3/; @@ -1282,6 +1340,11 @@ specialize qw/vp9_sad8x8x3 sse3/; add_proto qw/void vp9_sad4x4x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; specialize qw/vp9_sad4x4x3 sse3/; +if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { +add_proto qw/void vp9_sad128x128x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vp9_sad128x128x8/; +} + add_proto qw/void vp9_sad64x64x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; specialize qw/vp9_sad64x64x8/; @@ -1309,6 +1372,17 @@ specialize qw/vp9_sad4x8x8/; add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; specialize qw/vp9_sad4x4x8 sse4/; +if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { +add_proto qw/void vp9_sad128x128x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad128x128x4d/; + +add_proto qw/void vp9_sad128x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad128x64x4d/; + +add_proto qw/void vp9_sad64x128x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad64x128x4d/; +} + add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; specialize qw/vp9_sad64x64x4d sse2 avx2/; @@ -1492,9 +1566,14 @@ if (vpx_config("CONFIG_WEDGE_PARTITION") eq "yes") { } # ENCODEMB INVOKE +if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; -specialize qw/vp9_subtract_block neon/, "$sse2_x86inc"; +specialize qw/vp9_subtract_block/; +}else{ +add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; +specialize qw/vp9_subtract_block neon/, "$sse2_x86inc"; +} # # Denoiser # @@ -1745,6 +1824,16 @@ specialize qw/vp9_temporal_filter_apply sse2/; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # variance + if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { + add_proto qw/unsigned int vp9_highbd_variance128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vp9_highbd_variance128x128/; + + add_proto qw/unsigned int vp9_highbd_variance128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vp9_highbd_variance128x64/; + + add_proto qw/unsigned int vp9_highbd_variance64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vp9_highbd_variance64x128/; + } add_proto qw/unsigned int vp9_highbd_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_highbd_variance32x16/, "$sse2_x86inc"; @@ -1790,6 +1879,17 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vp9_highbd_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; specialize qw/vp9_highbd_get16x16var/, "$sse2_x86inc"; + if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { + add_proto qw/unsigned int vp9_highbd_10_variance128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_variance128x128/; + + add_proto qw/unsigned int vp9_highbd_10_variance128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_variance128x64/; + + add_proto qw/unsigned int vp9_highbd_10_variance64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_variance64x128/; + } + add_proto qw/unsigned int vp9_highbd_10_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_highbd_10_variance32x16/, "$sse2_x86inc"; @@ -1835,6 +1935,17 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vp9_highbd_10_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; specialize qw/vp9_highbd_10_get16x16var/, "$sse2_x86inc"; + if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { + add_proto qw/unsigned int vp9_highbd_12_variance128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_variance128x128/; + + add_proto qw/unsigned int vp9_highbd_12_variance128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_variance128x64/; + + add_proto qw/unsigned int vp9_highbd_12_variance64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_variance64x128/; + } + add_proto qw/unsigned int vp9_highbd_12_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_highbd_12_variance32x16/, "$sse2_x86inc"; @@ -1880,6 +1991,26 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vp9_highbd_12_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; specialize qw/vp9_highbd_12_get16x16var/, "$sse2_x86inc"; + if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { + add_proto qw/unsigned int vp9_highbd_sub_pixel_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vp9_highbd_sub_pixel_variance128x128/; + + add_proto qw/unsigned int vp9_highbd_sub_pixel_avg_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; + specialize qw/vp9_highbd_sub_pixel_avg_variance128x128/; + + add_proto qw/unsigned int vp9_highbd_sub_pixel_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vp9_highbd_sub_pixel_variance128x64/; + + add_proto qw/unsigned int vp9_highbd_sub_pixel_avg_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; + specialize qw/vp9_highbd_sub_pixel_avg_variance128x64/; + + add_proto qw/unsigned int vp9_highbd_sub_pixel_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vp9_highbd_sub_pixel_variance64x128/; + + add_proto qw/unsigned int vp9_highbd_sub_pixel_avg_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; + specialize qw/vp9_highbd_sub_pixel_avg_variance64x128/; + } + add_proto qw/unsigned int vp9_highbd_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_highbd_sub_pixel_variance64x64/, "$sse2_x86inc"; @@ -1958,6 +2089,26 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/unsigned int vp9_highbd_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; specialize qw/vp9_highbd_sub_pixel_avg_variance4x4/; + if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { + add_proto qw/unsigned int vp9_highbd_10_sub_pixel_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_sub_pixel_variance128x128/; + + add_proto qw/unsigned int vp9_highbd_10_sub_pixel_avg_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; + specialize qw/vp9_highbd_10_sub_pixel_avg_variance128x128/; + + add_proto qw/unsigned int vp9_highbd_10_sub_pixel_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_sub_pixel_variance128x64/; + + add_proto qw/unsigned int vp9_highbd_10_sub_pixel_avg_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; + specialize qw/vp9_highbd_10_sub_pixel_avg_variance128x64/; + + add_proto qw/unsigned int vp9_highbd_10_sub_pixel_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_sub_pixel_variance64x128/; + + add_proto qw/unsigned int vp9_highbd_10_sub_pixel_avg_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; + specialize qw/vp9_highbd_10_sub_pixel_avg_variance64x128/; + } + add_proto qw/unsigned int vp9_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_highbd_10_sub_pixel_variance64x64/, "$sse2_x86inc"; @@ -2036,6 +2187,26 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/unsigned int vp9_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; specialize qw/vp9_highbd_10_sub_pixel_avg_variance4x4/; + if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { + add_proto qw/unsigned int vp9_highbd_12_sub_pixel_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_sub_pixel_variance128x128/; + + add_proto qw/unsigned int vp9_highbd_12_sub_pixel_avg_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; + specialize qw/vp9_highbd_12_sub_pixel_avg_variance128x128/; + + add_proto qw/unsigned int vp9_highbd_12_sub_pixel_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_sub_pixel_variance128x64/; + + add_proto qw/unsigned int vp9_highbd_12_sub_pixel_avg_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; + specialize qw/vp9_highbd_12_sub_pixel_avg_variance128x64/; + + add_proto qw/unsigned int vp9_highbd_12_sub_pixel_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_sub_pixel_variance64x128/; + + add_proto qw/unsigned int vp9_highbd_12_sub_pixel_avg_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; + specialize qw/vp9_highbd_12_sub_pixel_avg_variance64x128/; + } + add_proto qw/unsigned int vp9_highbd_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_highbd_12_sub_pixel_variance64x64/, "$sse2_x86inc"; @@ -2114,6 +2285,17 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/unsigned int vp9_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; specialize qw/vp9_highbd_12_sub_pixel_avg_variance4x4/; + if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { + add_proto qw/unsigned int vp9_highbd_sad128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vp9_highbd_sad128x128/; + + add_proto qw/unsigned int vp9_highbd_sad128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vp9_highbd_sad128x64/; + + add_proto qw/unsigned int vp9_highbd_sad64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vp9_highbd_sad64x128/; + } + add_proto qw/unsigned int vp9_highbd_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_highbd_sad64x64/, "$sse2_x86inc"; @@ -2153,6 +2335,17 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/unsigned int vp9_highbd_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_highbd_sad4x4/; + if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { + add_proto qw/unsigned int vp9_highbd_sad128x128_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; + specialize qw/vp9_highbd_sad128x128_avg/; + + add_proto qw/unsigned int vp9_highbd_sad128x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; + specialize qw/vp9_highbd_sad128x64_avg/; + + add_proto qw/unsigned int vp9_highbd_sad64x128_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; + specialize qw/vp9_highbd_sad64x128_avg/; + } + add_proto qw/unsigned int vp9_highbd_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_highbd_sad64x64_avg/, "$sse2_x86inc"; @@ -2192,6 +2385,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/unsigned int vp9_highbd_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_highbd_sad4x4_avg/; + if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { + add_proto qw/void vp9_highbd_sad128x128x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; + specialize qw/vp9_highbd_sad128x128x3/; + } + add_proto qw/void vp9_highbd_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; specialize qw/vp9_highbd_sad64x64x3/; @@ -2213,6 +2411,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vp9_highbd_sad4x4x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; specialize qw/vp9_highbd_sad4x4x3/; + if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { + add_proto qw/void vp9_highbd_sad128x128x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; + specialize qw/vp9_highbd_sad128x128x8/; + } + add_proto qw/void vp9_highbd_sad64x64x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; specialize qw/vp9_highbd_sad64x64x8/; @@ -2240,6 +2443,17 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vp9_highbd_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; specialize qw/vp9_highbd_sad4x4x8/; + if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { + add_proto qw/void vp9_highbd_sad128x128x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; + specialize qw/vp9_highbd_sad128x128x4d/; + + add_proto qw/void vp9_highbd_sad128x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; + specialize qw/vp9_highbd_sad128x64x4d/; + + add_proto qw/void vp9_highbd_sad64x128x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; + specialize qw/vp9_highbd_sad64x128x4d/; + } + add_proto qw/void vp9_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; specialize qw/vp9_highbd_sad64x64x4d sse2/; diff --git a/vp9/encoder/vp9_context_tree.c b/vp9/encoder/vp9_context_tree.c index de8c2c2dd..ea76d63f1 100644 --- a/vp9/encoder/vp9_context_tree.c +++ b/vp9/encoder/vp9_context_tree.c @@ -16,6 +16,9 @@ static const BLOCK_SIZE square[] = { BLOCK_16X16, BLOCK_32X32, BLOCK_64X64, +#if CONFIG_EXT_CODING_UNIT_SIZE + BLOCK_128X128, +#endif }; static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk, @@ -144,8 +147,13 @@ static void free_tree_contexts(PC_TREE *tree) { // represents the state of our search. void vp9_setup_pc_tree(VP9_COMMON *cm, VP9_COMP *cpi) { int i, j; +#if CONFIG_EXT_CODING_UNIT_SIZE + const int leaf_nodes = 64 * 4; + const int tree_nodes = 64 * 4 + 64 + 16 + 4 + 1; +#else const int leaf_nodes = 64; const int tree_nodes = 64 + 16 + 4 + 1; +#endif int pc_tree_index = 0; PC_TREE *this_pc; PICK_MODE_CONTEXT *this_leaf; @@ -199,8 +207,13 @@ void vp9_setup_pc_tree(VP9_COMMON *cm, VP9_COMP *cpi) { } void vp9_free_pc_tree(VP9_COMP *cpi) { +#if CONFIG_EXT_CODING_UNIT_SIZE + const int tree_nodes = 64 * 4 + 64 + 16 + 4 + 1; + const int leaf_nodes = 64 * 4; +#else const int tree_nodes = 64 + 16 + 4 + 1; const int leaf_nodes = 64; +#endif int i; // Set up all 4x4 mode contexts diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 989099219..37cd81feb 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -114,6 +114,16 @@ static const uint8_t VP9_VAR_OFFS[CODING_UNIT_SIZE] = { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, +#if CONFIG_EXT_CODING_UNIT_SIZE + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, +#endif }; #if CONFIG_VP9_HIGHBITDEPTH @@ -126,6 +136,16 @@ static const uint16_t VP9_HIGH_VAR_OFFS_8[CODING_UNIT_SIZE] = { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, +#if CONFIG_EXT_CODING_UNIT_SIZE + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128 +#endif }; static const uint16_t VP9_HIGH_VAR_OFFS_10[CODING_UNIT_SIZE] = { @@ -137,6 +157,16 @@ static const uint16_t VP9_HIGH_VAR_OFFS_10[CODING_UNIT_SIZE] = { 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, +#if CONFIG_EXT_CODING_UNIT_SIZE + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4 +#endif }; static const uint16_t VP9_HIGH_VAR_OFFS_12[CODING_UNIT_SIZE] = { @@ -148,6 +178,16 @@ static const uint16_t VP9_HIGH_VAR_OFFS_12[CODING_UNIT_SIZE] = { 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, +#if CONFIG_EXT_CODING_UNIT_SIZE + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16 +#endif }; #endif // CONFIG_VP9_HIGHBITDEPTH @@ -432,6 +472,13 @@ typedef struct { v32x32 split[4]; } v64x64; +#if CONFIG_EXT_CODING_UNIT_SIZE +typedef struct { + partition_variance part_variances; + v64x64 split[4]; +} v128x128; +#endif + typedef struct { partition_variance *part_variances; var *split[4]; @@ -441,6 +488,9 @@ typedef enum { V16X16, V32X32, V64X64, +#if CONFIG_EXT_CODING_UNIT_SIZE + V128X128, +#endif } TREE_LEVEL; static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { @@ -448,6 +498,15 @@ static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { node->part_variances = NULL; vpx_memset(node->split, 0, sizeof(node->split)); switch (bsize) { +#if CONFIG_EXT_CODING_UNIT_SIZE + case BLOCK_128X128: { + v128x128 *vt = (v128x128 *) data; + node->part_variances = &vt->part_variances; + for (i = 0; i < 4; i++) + node->split[i] = &vt->split[i].part_variances.none; + break; + } +#endif case BLOCK_64X64: { v64x64 *vt = (v64x64 *) data; node->part_variances = &vt->part_variances; @@ -600,6 +659,10 @@ static void choose_partitioning(VP9_COMP *cpi, vp9_clear_system_state(); vp9_zero(vt); set_offsets(cpi, tile, mi_row, mi_col, BLOCK_LARGEST); +#if CONFIG_EXT_CODING_UNIT_SIZE + printf("Not yet implemented: choose_partitioning\n"); + exit(-1); +#endif if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); @@ -2024,8 +2087,8 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, int i, pl; PARTITION_TYPE partition = PARTITION_NONE; BLOCK_SIZE subsize; - ENTROPY_CONTEXT l[(CODING_UNIT_SIZE / 4) * MAX_MB_PLANE]; - ENTROPY_CONTEXT a[(CODING_UNIT_SIZE / 4) * MAX_MB_PLANE]; + ENTROPY_CONTEXT l[(CODING_UNIT_SIZE >> 2) * MAX_MB_PLANE]; + ENTROPY_CONTEXT a[(CODING_UNIT_SIZE >> 2) * MAX_MB_PLANE]; PARTITION_CONTEXT sl[CODING_UNIT_SIZE >> 3], sa[CODING_UNIT_SIZE >> 3]; RD_COST last_part_rdc, none_rdc, chosen_rdc; BLOCK_SIZE sub_subsize = BLOCK_4X4; @@ -2275,8 +2338,8 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, #if CONFIG_SUPERTX int rt_nocoef = 0; #endif - ENTROPY_CONTEXT l[(CODING_UNIT_SIZE / 4) * MAX_MB_PLANE]; - ENTROPY_CONTEXT a[(CODING_UNIT_SIZE / 4) * MAX_MB_PLANE]; + ENTROPY_CONTEXT l[(CODING_UNIT_SIZE >> 2) * MAX_MB_PLANE]; + ENTROPY_CONTEXT a[(CODING_UNIT_SIZE >> 2) * MAX_MB_PLANE]; PARTITION_CONTEXT sl[CODING_UNIT_SIZE >> 3], sa[CODING_UNIT_SIZE >> 3]; if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) @@ -2385,7 +2448,10 @@ static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = { BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, - BLOCK_16X16 + BLOCK_16X16, +#if CONFIG_EXT_CODING_UNIT_SIZE + BLOCK_16X16, BLOCK_16X16, BLOCK_16X16 +#endif }; static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { @@ -2393,7 +2459,10 @@ static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { BLOCK_16X16, BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, - BLOCK_64X64 + BLOCK_64X64, +#if CONFIG_EXT_CODING_UNIT_SIZE + BLOCK_64X64, BLOCK_64X64, BLOCK_128X128 +#endif }; // Look at all the mode_info entries for blocks that are part of this @@ -2431,7 +2500,10 @@ static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = { BLOCK_8X8, BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, - BLOCK_64X64 + BLOCK_64X64, +#if CONFIG_EXT_CODING_UNIT_SIZE + BLOCK_64X64, BLOCK_64X64, BLOCK_128X128 +#endif }; // Look at neighboring blocks and set a min and max partition size based on @@ -2869,8 +2941,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2; - ENTROPY_CONTEXT l[(CODING_UNIT_SIZE / 4) * MAX_MB_PLANE]; - ENTROPY_CONTEXT a[(CODING_UNIT_SIZE / 4) * MAX_MB_PLANE]; + ENTROPY_CONTEXT l[(CODING_UNIT_SIZE >> 2) * MAX_MB_PLANE]; + ENTROPY_CONTEXT a[(CODING_UNIT_SIZE >> 2) * MAX_MB_PLANE]; PARTITION_CONTEXT sl[CODING_UNIT_SIZE >> 3], sa[CODING_UNIT_SIZE >> 3]; TOKENEXTRA *tp_orig = *tp; PICK_MODE_CONTEXT *ctx = &pc_tree->none; @@ -3799,7 +3871,11 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCKD *const xd = &cpi->mb.e_mbd; SPEED_FEATURES *const sf = &cpi->sf; int mi_col; +#if CONFIG_EXT_CODING_UNIT_SIZE + const int leaf_nodes = 64 * 4; +#else const int leaf_nodes = 64; +#endif // Initialize the left context for the new SB row vpx_memset(&xd->left_context, 0, sizeof(xd->left_context)); diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 0de65bfcb..138e21f76 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -779,6 +779,19 @@ static void fnname##_bits12(const uint8_t *src_ptr, \ sad_array[i] >>= 4; \ } +#if CONFIG_EXT_CODING_UNIT_SIZE +MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad128x128) +MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad128x128_avg) +MAKE_BFP_SAD3_WRAPPER(vp9_highbd_sad128x128x3) +MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad128x128x8) +MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad128x128x4d) +MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad128x64) +MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad128x64_avg) +MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad128x64x4d) +MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad64x128) +MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad64x128_avg) +MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad64x128x4d) +#endif MAKE_BFP_SAD_WRAPPER(vp9_highbd_sad32x16) MAKE_BFP_SADAVG_WRAPPER(vp9_highbd_sad32x16_avg) MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad32x16x4d) @@ -1020,6 +1033,38 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) { vp9_highbd_sad4x4x8_bits8, vp9_highbd_sad4x4x4d_bits8) +#if CONFIG_EXT_CODING_UNIT_SIZE + HIGHBD_BFP(BLOCK_128X128, + vp9_highbd_sad128x128_bits8, + vp9_highbd_sad128x128_avg_bits8, + vp9_highbd_variance128x128, + vp9_highbd_sub_pixel_variance128x128, + vp9_highbd_sub_pixel_avg_variance128x128, + vp9_highbd_sad128x128x3_bits8, + vp9_highbd_sad128x128x8_bits8, + vp9_highbd_sad128x128x4d_bits8) + + HIGHBD_BFP(BLOCK_128X64, + vp9_highbd_sad128x64_bits8, + vp9_highbd_sad128x64_avg_bits8, + vp9_highbd_variance128x64, + vp9_highbd_sub_pixel_variance128x64, + vp9_highbd_sub_pixel_avg_variance128x64, + NULL, + NULL, + vp9_highbd_sad128x64x4d_bits8) + + HIGHBD_BFP(BLOCK_64X128, + vp9_highbd_sad64x128_bits8, + vp9_highbd_sad64x128_avg_bits8, + vp9_highbd_variance64x128, + vp9_highbd_sub_pixel_variance64x128, + vp9_highbd_sub_pixel_avg_variance64x128, + NULL, + NULL, + vp9_highbd_sad64x128x4d_bits8) +#endif + #if CONFIG_WEDGE_PARTITION HIGHBD_MBFP(BLOCK_64X64, vp9_highbd_masked_sad64x64_bits8, @@ -1207,6 +1252,38 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) { vp9_highbd_sad4x4x8_bits10, vp9_highbd_sad4x4x4d_bits10) +#if CONFIG_EXT_CODING_UNIT_SIZE + HIGHBD_BFP(BLOCK_128X128, + vp9_highbd_sad128x128_bits10, + vp9_highbd_sad128x128_avg_bits10, + vp9_highbd_10_variance128x128, + vp9_highbd_10_sub_pixel_variance128x128, + vp9_highbd_10_sub_pixel_avg_variance128x128, + vp9_highbd_sad128x128x3_bits10, + vp9_highbd_sad128x128x8_bits10, + vp9_highbd_sad128x128x4d_bits10) + + HIGHBD_BFP(BLOCK_128X64, + vp9_highbd_sad128x64_bits10, + vp9_highbd_sad128x64_avg_bits10, + vp9_highbd_10_variance128x64, + vp9_highbd_10_sub_pixel_variance128x64, + vp9_highbd_10_sub_pixel_avg_variance128x64, + NULL, + NULL, + vp9_highbd_sad128x64x4d_bits10) + + HIGHBD_BFP(BLOCK_64X128, + vp9_highbd_sad64x128_bits10, + vp9_highbd_sad64x128_avg_bits10, + vp9_highbd_10_variance64x128, + vp9_highbd_10_sub_pixel_variance64x128, + vp9_highbd_10_sub_pixel_avg_variance64x128, + NULL, + NULL, + vp9_highbd_sad64x128x4d_bits10) +#endif + #if CONFIG_WEDGE_PARTITION HIGHBD_MBFP(BLOCK_64X64, vp9_highbd_masked_sad64x64_bits10, @@ -1394,6 +1471,38 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) { vp9_highbd_sad4x4x8_bits12, vp9_highbd_sad4x4x4d_bits12) +#if CONFIG_EXT_CODING_UNIT_SIZE + HIGHBD_BFP(BLOCK_128X128, + vp9_highbd_sad128x128_bits12, + vp9_highbd_sad128x128_avg_bits12, + vp9_highbd_12_variance128x128, + vp9_highbd_12_sub_pixel_variance128x128, + vp9_highbd_12_sub_pixel_avg_variance128x128, + vp9_highbd_sad128x128x3_bits12, + vp9_highbd_sad128x128x8_bits12, + vp9_highbd_sad128x128x4d_bits12) + + HIGHBD_BFP(BLOCK_128X64, + vp9_highbd_sad128x64_bits12, + vp9_highbd_sad128x64_avg_bits12, + vp9_highbd_12_variance128x64, + vp9_highbd_12_sub_pixel_variance128x64, + vp9_highbd_12_sub_pixel_avg_variance128x64, + NULL, + NULL, + vp9_highbd_sad128x64x4d_bits12) + + HIGHBD_BFP(BLOCK_64X128, + vp9_highbd_sad64x128_bits12, + vp9_highbd_sad64x128_avg_bits12, + vp9_highbd_12_variance64x128, + vp9_highbd_12_sub_pixel_variance64x128, + vp9_highbd_12_sub_pixel_avg_variance64x128, + NULL, + NULL, + vp9_highbd_sad64x128x4d_bits12) +#endif + #if CONFIG_WEDGE_PARTITION HIGHBD_MBFP(BLOCK_64X64, vp9_highbd_masked_sad64x64_bits12, @@ -1815,6 +1924,21 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { cpi->fn_ptr[BT].sdx8f = SDX8F; \ cpi->fn_ptr[BT].sdx4df = SDX4DF; +#if CONFIG_EXT_CODING_UNIT_SIZE + BFP(BLOCK_128X128, vp9_sad128x128, vp9_sad128x128_avg, + vp9_variance128x128, vp9_sub_pixel_variance128x128, + vp9_sub_pixel_avg_variance128x128, vp9_sad128x128x3, vp9_sad128x128x8, + vp9_sad128x128x4d) + + BFP(BLOCK_128X64, vp9_sad128x64, vp9_sad128x64_avg, + vp9_variance128x64, vp9_sub_pixel_variance128x64, + vp9_sub_pixel_avg_variance128x64, NULL, NULL, vp9_sad128x64x4d) + + BFP(BLOCK_64X128, vp9_sad64x128, vp9_sad64x128_avg, + vp9_variance64x128, vp9_sub_pixel_variance64x128, + vp9_sub_pixel_avg_variance64x128, NULL, NULL, vp9_sad64x128x4d) +#endif + BFP(BLOCK_32X16, vp9_sad32x16, vp9_sad32x16_avg, vp9_variance32x16, vp9_sub_pixel_variance32x16, vp9_sub_pixel_avg_variance32x16, NULL, NULL, vp9_sad32x16x4d) diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c index 2179ba42d..e81032ce7 100644 --- a/vp9/encoder/vp9_rd.c +++ b/vp9/encoder/vp9_rd.c @@ -61,7 +61,10 @@ void vp9_rd_cost_init(RD_COST *rd_cost) { // This table is used to correct for block size. // The factors here are << 2 (2 = x0.5, 32 = x8 etc). static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = { - 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32 + 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, +#if CONFIG_EXT_CODING_UNIT_SIZE + 48, 48, 64 +#endif }; static void fill_mode_costs(VP9_COMP *cpi) { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 16835ce51..0597c2cb0 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -7910,7 +7910,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, TX_SIZE best_tx_size; int rate2_tx, this_skip2_tx = 0; int64_t distortion2_tx, bestrd_tx = INT64_MAX; - uint8_t tmp_zcoeff_blk[(CODING_UNIT_SIZE * CODING_UNIT_SIZE) / 16]; + uint8_t tmp_zcoeff_blk[(CODING_UNIT_SIZE * CODING_UNIT_SIZE) >> 4]; #endif // CONFIG_EXT_TX *mbmi = *inter_ref_list[copy_mode - REF0]; diff --git a/vp9/encoder/vp9_sad.c b/vp9/encoder/vp9_sad.c index f926a5500..9081dd765 100644 --- a/vp9/encoder/vp9_sad.c +++ b/vp9/encoder/vp9_sad.c @@ -66,6 +66,22 @@ void vp9_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \ sads[i] = vp9_sad##m##x##n##_c(src, src_stride, refs[i], ref_stride); \ } +#if CONFIG_EXT_CODING_UNIT_SIZE +// 128x128 +sadMxN(128, 128) +sadMxNxK(128, 128, 3) +sadMxNxK(128, 128, 8) +sadMxNx4D(128, 128) + +// 128x64 +sadMxN(128, 64) +sadMxNx4D(128, 64) + +// 64x128 +sadMxN(64, 128) +sadMxNx4D(64, 128) +#endif + // 64x64 sadMxN(64, 64) sadMxNxK(64, 64, 3) @@ -205,6 +221,22 @@ void vp9_highbd_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \ } \ } +#if CONFIG_EXT_CODING_UNIT_SIZE +// 128x128 +highbd_sadMxN(128, 128) +highbd_sadMxNxK(128, 128, 3) +highbd_sadMxNxK(128, 128, 8) +highbd_sadMxNx4D(128, 128) + +// 128x64 +highbd_sadMxN(128, 64) +highbd_sadMxNx4D(128, 64) + +// 64x128 +highbd_sadMxN(64, 128) +highbd_sadMxNx4D(64, 128) +#endif + // 64x64 highbd_sadMxN(64, 64) highbd_sadMxNxK(64, 64, 3) diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c index 36f02da3f..ad0cc36ee 100644 --- a/vp9/encoder/vp9_variance.c +++ b/vp9/encoder/vp9_variance.c @@ -253,6 +253,20 @@ VAR(64, 64) SUBPIX_VAR(64, 64) SUBPIX_AVG_VAR(64, 64) +#if CONFIG_EXT_CODING_UNIT_SIZE +VAR(64, 128) +SUBPIX_VAR(64, 128) +SUBPIX_AVG_VAR(64, 128) + +VAR(128, 64) +SUBPIX_VAR(128, 64) +SUBPIX_AVG_VAR(128, 64) + +VAR(128, 128) +SUBPIX_VAR(128, 128) +SUBPIX_AVG_VAR(128, 128) +#endif + void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride) { int i, j; @@ -632,6 +646,20 @@ HIGHBD_VAR(64, 64) HIGHBD_SUBPIX_VAR(64, 64) HIGHBD_SUBPIX_AVG_VAR(64, 64) +#if CONFIG_EXT_CODING_UNIT_SIZE +HIGHBD_VAR(64, 128) +HIGHBD_SUBPIX_VAR(64, 128) +HIGHBD_SUBPIX_AVG_VAR(64, 128) + +HIGHBD_VAR(128, 64) +HIGHBD_SUBPIX_VAR(128, 64) +HIGHBD_SUBPIX_AVG_VAR(128, 64) + +HIGHBD_VAR(128, 128) +HIGHBD_SUBPIX_VAR(128, 128) +HIGHBD_SUBPIX_AVG_VAR(128, 128) +#endif + void vp9_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride) { diff --git a/vp9/encoder/x86/vp9_denoiser_sse2.c b/vp9/encoder/x86/vp9_denoiser_sse2.c index bf5fa889f..1a83f9ccd 100644 --- a/vp9/encoder/x86/vp9_denoiser_sse2.c +++ b/vp9/encoder/x86/vp9_denoiser_sse2.c @@ -460,9 +460,16 @@ int vp9_denoiser_filter_sse2(const uint8_t *sig, int sig_stride, avg, avg_stride, increase_denoising, bs, motion_magnitude); +#if CONFIG_EXT_CODING_UNIT_SIZE + } else if (bs == BLOCK_16X8 || bs == BLOCK_16X16 || bs == BLOCK_16X32 || + bs == BLOCK_32X16 || bs == BLOCK_32X32 || bs == BLOCK_32X64 || + bs == BLOCK_64X32 || bs == BLOCK_64X64 || bs == BLOCK_64X128 || + bs == BLOCK_128X64 || bs == BLOCK_128X128) { +#else } else if (bs == BLOCK_16X8 || bs == BLOCK_16X16 || bs == BLOCK_16X32 || bs == BLOCK_32X16|| bs == BLOCK_32X32 || bs == BLOCK_32X64 || bs == BLOCK_64X32 || bs == BLOCK_64X64) { +#endif return vp9_denoiser_64_32_16xM_sse2(sig, sig_stride, mc_avg, mc_avg_stride, avg, avg_stride, -- 2.40.0