]> granicus.if.org Git - libvpx/commitdiff
SSE2 optim of vp9_subtract_block for 128x128 units
authorJulia Robson <juliamrobson@gmail.com>
Thu, 12 Nov 2015 12:15:09 +0000 (12:15 +0000)
committerDebargha Mukherjee <debargha@google.com>
Fri, 13 Nov 2015 19:12:56 +0000 (11:12 -0800)
Extending the SSE2 implementation of vp9_subtract_block to work
with the 128x128 coding unit experiment

Change-Id: Ib3cc16bf5801ef2c7eecc19d3cc07a8c50631580

test/vp9_subtract_test.cc
vp9/common/vp9_rtcd_defs.pl
vp9/encoder/x86/vp9_subtract_sse2.asm

index 92dc476b5d7d0767e63762eb49fa1ecd0905197d..fabb43824751988fcd08b623b5dda062ad77de06 100644 (file)
@@ -91,7 +91,7 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) {
 INSTANTIATE_TEST_CASE_P(C, VP9SubtractBlockTest,
                         ::testing::Values(vp9_subtract_block_c));
 
-#if HAVE_SSE2 && CONFIG_USE_X86INC && !CONFIG_EXT_CODING_UNIT_SIZE
+#if HAVE_SSE2 && CONFIG_USE_X86INC
 INSTANTIATE_TEST_CASE_P(SSE2, VP9SubtractBlockTest,
                         ::testing::Values(vp9_subtract_block_sse2));
 #endif
index e7e4e9a8016188ed8a12ba116241ed1c35da985f..cdb24ed44c7f1dcc04cf62549c9d06c568ea5ea5 100644 (file)
@@ -1566,14 +1566,9 @@ if (vpx_config("CONFIG_WEDGE_PARTITION") eq "yes") {
 }
 # ENCODEMB INVOKE
 
-if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") {
-add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
-specialize qw/vp9_subtract_block/;
+  add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
+  specialize qw/vp9_subtract_block neon/, "$sse2_x86inc";
 
-}else{
-add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
-specialize qw/vp9_subtract_block neon/, "$sse2_x86inc";
-}
 #
 # Denoiser
 #
index 982408083528c7bf68b0ff2848105457b57085c3..cf0f7476483458c3df7a01b0750547eda77fab99 100644 (file)
@@ -31,6 +31,10 @@ cglobal subtract_block, 7, 7, 8, \
   je .case_16
   cmp                colsd, 32
   je .case_32
+%if CONFIG_EXT_CODING_UNIT_SIZE
+  cmp                colsd, 64
+  je .case_64
+%endif
 
 %macro loop16 6
   mova                  m0, [srcq+%1]
@@ -55,6 +59,22 @@ cglobal subtract_block, 7, 7, 8, \
   mova [diffq+mmsize*1+%6], m1
 %endmacro
 
+%if CONFIG_EXT_CODING_UNIT_SIZE
+  mov             pred_str, pred_stridemp
+.loop_128:
+  loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize,  0*mmsize,  2*mmsize
+  loop16 2*mmsize, 3*mmsize, 2*mmsize, 3*mmsize,  4*mmsize,  6*mmsize
+  loop16 4*mmsize, 5*mmsize, 4*mmsize, 5*mmsize,  8*mmsize, 10*mmsize
+  loop16 6*mmsize, 7*mmsize, 6*mmsize, 7*mmsize, 12*mmsize, 14*mmsize
+  lea                diffq, [diffq+diff_strideq*2]
+  add                predq, pred_str
+  add                 srcq, src_strideq
+  sub                rowsd, 1
+  jnz .loop_128
+  RET
+
+.case_64:
+%endif
   mov             pred_str, pred_stridemp
 .loop_64:
   loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize, 0*mmsize, 2*mmsize