From 8f6b9431001b96fa9154271a13ea1d175ca2796f Mon Sep 17 00:00:00 2001
From: Scott LaVarnway <slavarnway@google.com>
Date: Mon, 3 Aug 2015 09:43:34 -0700
Subject: [PATCH] VPX: Add rtcd support for scaling.

Change-Id: If34bfb0d918967445aea7dc30cd7b55ebfedb1f2
---
 vp9/common/vp9_scale.c       | 49 +++++++++++++++++++-------------
 vpx_dsp/vpx_convolve.c       | 54 ++++++++++++++++++++++++++++++++++++
 vpx_dsp/vpx_dsp_rtcd_defs.pl | 18 ++++++++++++
 vpx_dsp/x86/convolve.h       | 10 +++----
 4 files changed, 106 insertions(+), 25 deletions(-)

diff --git a/vp9/common/vp9_scale.c b/vp9/common/vp9_scale.c
index 8f5c72e7c..b763b925b 100644
--- a/vp9/common/vp9_scale.c
+++ b/vp9/common/vp9_scale.c
@@ -79,6 +79,7 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *sf,
   // applied in one direction only, and not at all for 0,0, seems to give the
   // best quality, but it may be worth trying an additional mode that does
   // do the filtering on full-pel.
+
   if (sf->x_step_q4 == 16) {
     if (sf->y_step_q4 == 16) {
       // No scaling in either direction.
@@ -90,35 +91,43 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *sf,
       sf->predict[1][0][1] = vpx_convolve8_avg_horiz;
     } else {
       // No scaling in x direction. Must always scale in the y direction.
-      sf->predict[0][0][0] = vpx_convolve8_vert;
-      sf->predict[0][0][1] = vpx_convolve8_avg_vert;
-      sf->predict[0][1][0] = vpx_convolve8_vert;
-      sf->predict[0][1][1] = vpx_convolve8_avg_vert;
-      sf->predict[1][0][0] = vpx_convolve8;
-      sf->predict[1][0][1] = vpx_convolve8_avg;
+      sf->predict[0][0][0] = vpx_scaled_vert;
+      sf->predict[0][0][1] = vpx_scaled_avg_vert;
+      sf->predict[0][1][0] = vpx_scaled_vert;
+      sf->predict[0][1][1] = vpx_scaled_avg_vert;
+      sf->predict[1][0][0] = vpx_scaled_2d;
+      sf->predict[1][0][1] = vpx_scaled_avg_2d;
     }
   } else {
     if (sf->y_step_q4 == 16) {
       // No scaling in the y direction. Must always scale in the x direction.
-      sf->predict[0][0][0] = vpx_convolve8_horiz;
-      sf->predict[0][0][1] = vpx_convolve8_avg_horiz;
-      sf->predict[0][1][0] = vpx_convolve8;
-      sf->predict[0][1][1] = vpx_convolve8_avg;
-      sf->predict[1][0][0] = vpx_convolve8_horiz;
-      sf->predict[1][0][1] = vpx_convolve8_avg_horiz;
+      sf->predict[0][0][0] = vpx_scaled_horiz;
+      sf->predict[0][0][1] = vpx_scaled_avg_horiz;
+      sf->predict[0][1][0] = vpx_scaled_2d;
+      sf->predict[0][1][1] = vpx_scaled_avg_2d;
+      sf->predict[1][0][0] = vpx_scaled_horiz;
+      sf->predict[1][0][1] = vpx_scaled_avg_horiz;
     } else {
       // Must always scale in both directions.
-      sf->predict[0][0][0] = vpx_convolve8;
-      sf->predict[0][0][1] = vpx_convolve8_avg;
-      sf->predict[0][1][0] = vpx_convolve8;
-      sf->predict[0][1][1] = vpx_convolve8_avg;
-      sf->predict[1][0][0] = vpx_convolve8;
-      sf->predict[1][0][1] = vpx_convolve8_avg;
+      sf->predict[0][0][0] = vpx_scaled_2d;
+      sf->predict[0][0][1] = vpx_scaled_avg_2d;
+      sf->predict[0][1][0] = vpx_scaled_2d;
+      sf->predict[0][1][1] = vpx_scaled_avg_2d;
+      sf->predict[1][0][0] = vpx_scaled_2d;
+      sf->predict[1][0][1] = vpx_scaled_avg_2d;
     }
   }
+
   // 2D subpel motion always gets filtered in both directions
-  sf->predict[1][1][0] = vpx_convolve8;
-  sf->predict[1][1][1] = vpx_convolve8_avg;
+
+  if ((sf->x_step_q4 != 16) || (sf->y_step_q4 != 16)) {
+    sf->predict[1][1][0] = vpx_scaled_2d;
+    sf->predict[1][1][1] = vpx_scaled_avg_2d;
+  } else {
+    sf->predict[1][1][0] = vpx_convolve8;
+    sf->predict[1][1][1] = vpx_convolve8_avg;
+  }
+
 #if CONFIG_VP9_HIGHBITDEPTH
   if (use_highbd) {
     if (sf->x_step_q4 == 16) {
diff --git a/vpx_dsp/vpx_convolve.c b/vpx_dsp/vpx_convolve.c
index f06da3d34..2d1c927cb 100644
--- a/vpx_dsp/vpx_convolve.c
+++ b/vpx_dsp/vpx_convolve.c
@@ -282,6 +282,60 @@ void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
   }
 }
 
+void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
+                        uint8_t *dst, ptrdiff_t dst_stride,
+                        const int16_t *filter_x, int x_step_q4,
+                        const int16_t *filter_y, int y_step_q4,
+                        int w, int h) {
+  vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
+                        filter_y, y_step_q4, w, h);
+}
+
+void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride,
+                       uint8_t *dst, ptrdiff_t dst_stride,
+                       const int16_t *filter_x, int x_step_q4,
+                       const int16_t *filter_y, int y_step_q4,
+                       int w, int h) {
+  vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
+                       filter_y, y_step_q4, w, h);
+}
+
+void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride,
+                     uint8_t *dst, ptrdiff_t dst_stride,
+                     const int16_t *filter_x, int x_step_q4,
+                     const int16_t *filter_y, int y_step_q4,
+                     int w, int h) {
+  vpx_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
+                  filter_y, y_step_q4, w, h);
+}
+
+void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
+                            uint8_t *dst, ptrdiff_t dst_stride,
+                            const int16_t *filter_x, int x_step_q4,
+                            const int16_t *filter_y, int y_step_q4,
+                            int w, int h) {
+  vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
+                            x_step_q4, filter_y, y_step_q4, w, h);
+}
+
+void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
+                           uint8_t *dst, ptrdiff_t dst_stride,
+                           const int16_t *filter_x, int x_step_q4,
+                           const int16_t *filter_y, int y_step_q4,
+                           int w, int h) {
+  vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
+                           x_step_q4, filter_y, y_step_q4, w, h);
+}
+
+void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride,
+                     uint8_t *dst, ptrdiff_t dst_stride,
+                     const int16_t *filter_x, int x_step_q4,
+                     const int16_t *filter_y, int y_step_q4,
+                     int w, int h) {
+  vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
+                      filter_y, y_step_q4, w, h);
+}
+
 #if CONFIG_VP9_HIGHBITDEPTH
 static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
                                   uint8_t *dst8, ptrdiff_t dst_stride,
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index ca564bcf2..9e46467b3 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -398,6 +398,24 @@ specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 neon dspr2 msa/;
 add_proto qw/void vpx_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
 specialize qw/vpx_convolve8_avg_vert sse2 ssse3 neon dspr2 msa/;
 
+add_proto qw/void vpx_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_scaled_2d/;
+
+add_proto qw/void vpx_scaled_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_scaled_horiz/;
+
+add_proto qw/void vpx_scaled_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_scaled_vert/;
+
+add_proto qw/void vpx_scaled_avg_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_scaled_avg_2d/;
+
+add_proto qw/void vpx_scaled_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_scaled_avg_horiz/;
+
+add_proto qw/void vpx_scaled_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_scaled_avg_vert/;
+
 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   #
   # Sub Pixel Filters
diff --git a/vpx_dsp/x86/convolve.h b/vpx_dsp/x86/convolve.h
index c0144981b..a2e1faf90 100644
--- a/vpx_dsp/x86/convolve.h
+++ b/vpx_dsp/x86/convolve.h
@@ -103,9 +103,9 @@ typedef void filter8_1dfunction (
     } \
   } \
   if (w) { \
-    vpx_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
-                             filter_x, x_step_q4, filter_y, y_step_q4, \
-                             w, h); \
+    vpx_scaled_##name(src, src_stride, dst, dst_stride, \
+                      filter_x, x_step_q4, filter_y, y_step_q4, \
+                      w, h); \
   } \
 }
 
@@ -137,8 +137,8 @@ void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
                                       y_step_q4, w, h); \
     } \
   } else { \
-    vpx_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
-                           filter_x, x_step_q4, filter_y, y_step_q4, w, h); \
+    vpx_scaled_##avg##2d(src, src_stride, dst, dst_stride, \
+                         filter_x, x_step_q4, filter_y, y_step_q4, w, h); \
   } \
 }
 
-- 
2.40.0