Add 64x variance Neon functions

author Frank Galligan <fgalligan@google.com>

Tue, 13 Jan 2015 19:15:24 +0000 (11:15 -0800)

committer Frank Galligan <fgalligan@google.com>

Tue, 13 Jan 2015 23:08:13 +0000 (15:08 -0800)
author Frank Galligan <fgalligan@google.com>
Tue, 13 Jan 2015 19:15:24 +0000 (11:15 -0800)
committer Frank Galligan <fgalligan@google.com>
Tue, 13 Jan 2015 23:08:13 +0000 (15:08 -0800)
diff --git a/test/variance_test.cc b/test/variance_test.cc

index 4d279f686dfd5316891ea81a10aa68a473be729c..e7517a7d807cbf2bd801ce5916e69f367200fc87 100644 (file)
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -1914,11 +1914,17 @@ INSTANTIATE_TEST_CASE_P(
  const vp9_variance_fn_t variance8x8_neon = vp9_variance8x8_neon;
  const vp9_variance_fn_t variance16x16_neon = vp9_variance16x16_neon;
  const vp9_variance_fn_t variance32x32_neon = vp9_variance32x32_neon;
+const vp9_variance_fn_t variance32x64_neon = vp9_variance32x64_neon;
+const vp9_variance_fn_t variance64x32_neon = vp9_variance64x32_neon;
+const vp9_variance_fn_t variance64x64_neon = vp9_variance64x64_neon;
  INSTANTIATE_TEST_CASE_P(
      NEON, VP9VarianceTest,
      ::testing::Values(make_tuple(3, 3, variance8x8_neon, 0),
                        make_tuple(4, 4, variance16x16_neon, 0),
-                      make_tuple(5, 5, variance32x32_neon, 0)));
+                      make_tuple(5, 5, variance32x32_neon, 0),
+                      make_tuple(5, 6, variance32x64_neon, 0),
+                      make_tuple(6, 5, variance64x32_neon, 0),
+                      make_tuple(6, 6, variance64x64_neon, 0)));
  
  const vp9_subpixvariance_fn_t subpel_variance8x8_neon =
      vp9_sub_pixel_variance8x8_neon;
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl

index 88f85a86d3ed5c282a3ec16af993b13d4488d1c6..b59e6ebe794b146a4585bb6ff128f735b9a95d89 100644 (file)
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -798,16 +798,16 @@ add_proto qw/unsigned int vp9_variance16x32/, "const uint8_t *src_ptr, int sourc
  specialize qw/vp9_variance16x32/, "$sse2_x86inc";
  
  add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance64x32 avx2/, "$sse2_x86inc";
+specialize qw/vp9_variance64x32 avx2 neon/, "$sse2_x86inc";
  
  add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance32x64/, "$sse2_x86inc";
+specialize qw/vp9_variance32x64 neon/, "$sse2_x86inc";
  
  add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
  specialize qw/vp9_variance32x32 avx2 neon/, "$sse2_x86inc";
  
  add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance64x64 avx2/, "$sse2_x86inc";
+specialize qw/vp9_variance64x64 avx2 neon/, "$sse2_x86inc";
  
  add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
  specialize qw/vp9_variance16x16 avx2 neon/, "$sse2_x86inc";
diff --git a/vp9/encoder/arm/neon/vp9_variance_neon.c b/vp9/encoder/arm/neon/vp9_variance_neon.c

index 816fbda1fbe3e60c0647fbf37104144fd0811358..567b7deb17d636e12149dff15e9df731d560bdaa 100644 (file)
--- a/vp9/encoder/arm/neon/vp9_variance_neon.c
+++ b/vp9/encoder/arm/neon/vp9_variance_neon.c
@@ -10,6 +10,7 @@
  
  #include <arm_neon.h>
  #include "./vp9_rtcd.h"
+#include "./vpx_config.h"
  
  #include "vpx_ports/mem.h"
  #include "vpx/vpx_integer.h"
@@ -28,6 +29,8 @@ enum { kHeight16PlusOne = 17 };
  enum { kWidth32 = 32 };
  enum { kHeight32 = 32 };
  enum { kHeight32PlusOne = 33 };
+enum { kWidth64 = 64 };
+enum { kHeight64 = 64 };
  enum { kPixelStepOne = 1 };
  enum { kAlign16 = 16 };
  
@@ -208,6 +211,30 @@ unsigned int vp9_variance32x32_neon(const uint8_t *a, int a_stride,
    return *sse - (((int64_t)sum * sum) / (kWidth32 * kHeight32));
  }
  
+unsigned int vp9_variance32x64_neon(const uint8_t *a, int a_stride,
+                                    const uint8_t *b, int b_stride,
+                                    unsigned int *sse) {
+  int sum;
+  variance_neon_w8(a, a_stride, b, b_stride, kWidth32, kHeight64, sse, &sum);
+  return *sse - (((int64_t)sum * sum) >> 11);  // >> 11 = / 32 * 64
+}
+
+unsigned int vp9_variance64x32_neon(const uint8_t *a, int a_stride,
+                                    const uint8_t *b, int b_stride,
+                                    unsigned int *sse) {
+  int sum;
+  variance_neon_w8(a, a_stride, b, b_stride, kWidth64, kHeight32, sse, &sum);
+  return *sse - (((int64_t)sum * sum) >> 11);  // >> 11 = / 64 * 32
+}
+
+unsigned int vp9_variance64x64_neon(const uint8_t *a, int a_stride,
+                                    const uint8_t *b, int b_stride,
+                                    unsigned int *sse) {
+  int sum;
+  variance_neon_w8(a, a_stride, b, b_stride, kWidth64, kHeight64, sse, &sum);
+  return *sse - (((int64_t)sum * sum) >> 12);  // >> 12 = / 64 * 64
+}
+
  unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src,
                                                int src_stride,
                                                int xoffset,
author	Frank Galligan <fgalligan@google.com>
	Tue, 13 Jan 2015 19:15:24 +0000 (11:15 -0800)
committer	Frank Galligan <fgalligan@google.com>
	Tue, 13 Jan 2015 23:08:13 +0000 (15:08 -0800)
test/variance_test.cc		patch \| blob \| history
vp9/common/vp9_rtcd_defs.pl		patch \| blob \| history
vp9/encoder/arm/neon/vp9_variance_neon.c		patch \| blob \| history