]> granicus.if.org Git - libvpx/commitdiff
Reclassify optimized ssim calculations as SSE2.
authorFritz Koenig <frkoenig@google.com>
Mon, 22 Aug 2011 19:36:28 +0000 (12:36 -0700)
committerFritz Koenig <frkoenig@google.com>
Mon, 22 Aug 2011 19:36:28 +0000 (12:36 -0700)
Calculations were incorrectly classified as either
SSE3 or SSSE3.  Only using SSE2 instructions.
Cleanup function names and make non-RTCD code work
as well.

Change-Id: I48ad0218af0cc51c5078070a08511dee43ecfe09

vp8/encoder/generic/csystemdependent.c
vp8/encoder/ssim.c
vp8/encoder/variance.h
vp8/encoder/x86/ssim_opt.asm
vp8/encoder/x86/variance_x86.h
vp8/encoder/x86/x86_csystemdependent.c

index 9906105545f0e06c6366209f4099f463d732d4fa..a14843a8095415923601da7edfbbbd0d919b9959 100644 (file)
@@ -94,16 +94,15 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
 #if !(CONFIG_REALTIME_ONLY)
     cpi->rtcd.temporal.apply                 = vp8_temporal_filter_apply_c;
 #endif
+#if CONFIG_INTERNAL_STATS
+    cpi->rtcd.variance.ssimpf_8x8            = vp8_ssim_parms_8x8_c;
+    cpi->rtcd.variance.ssimpf_16x16          = vp8_ssim_parms_16x16_c;
+#endif
 #endif
 
     // Pure C:
     vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame;
 
-#if CONFIG_INTERNAL_STATS
-    cpi->rtcd.variance.ssimpf_8x8            = ssim_parms_8x8_c;
-    cpi->rtcd.variance.ssimpf                = ssim_parms_c;
-#endif
-
 #if ARCH_X86 || ARCH_X86_64
     vp8_arch_x86_encoder_init(cpi);
 #endif
index fea756f7b9cf790ee945ffc00de58373ed245112..d0f8e490a4512c28799a4b31dad9df0cd58845da 100644 (file)
@@ -9,18 +9,9 @@
  */
 
 
-#include "vpx_scale/yv12config.h"
-#include "math.h"
 #include "onyx_int.h"
 
-#if CONFIG_RUNTIME_CPU_DETECT
-#define IF_RTCD(x)  (x)
-#else
-#define IF_RTCD(x)  NULL
-#endif
-
-
-void ssim_parms_c
+void vp8_ssim_parms_16x16_c
 (
     unsigned char *s,
     int sp,
@@ -46,7 +37,7 @@ void ssim_parms_c
          }
      }
 }
-void ssim_parms_8x8_c
+void vp8_ssim_parms_8x8_c
 (
     unsigned char *s,
     int sp,
@@ -107,14 +98,14 @@ static double ssim_16x16(unsigned char *s,int sp, unsigned char *r,int rp,
             const vp8_variance_rtcd_vtable_t *rtcd)
 {
     unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
-    rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
+    SSIMPF_INVOKE(rtcd,16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
     return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256);
 }
 static double ssim_8x8(unsigned char *s,int sp, unsigned char *r,int rp,
                 const vp8_variance_rtcd_vtable_t *rtcd)
 {
     unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
-    rtcd->ssimpf_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
+    SSIMPF_INVOKE(rtcd,8x8)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
     return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64);
 }
 
@@ -134,7 +125,7 @@ long dssim(unsigned char *s,int sp, unsigned char *r,int rp,
     c1 = cc1*16;
     c2 = cc2*16;
 
-    rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
+    SSIMPF_INVOKE(rtcd,16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
     ssim_n1 = (2*sum_s*sum_r+ c1);
 
     ssim_n2 =((int64_t) 2*256*sum_sxr-(int64_t) 2*sum_s*sum_r+c2);
index 5fd6d3ae0aeb19e0aab4b400ae48d7da2c5774e9..d9bf66975081a103426eed7825524065b4a377c0 100644 (file)
@@ -320,16 +320,16 @@ extern prototype_variance(vp8_variance_mse16x16);
 #endif
 extern prototype_get16x16prederror(vp8_variance_get4x4sse_cs);
 
-#ifndef vp8_ssimpf
-#define vp8_ssimpf ssim_parms_c
-#endif
-extern prototype_ssimpf(vp8_ssimpf)
-
 #ifndef vp8_ssimpf_8x8
-#define vp8_ssimpf_8x8 ssim_parms_8x8_c
+#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_c
 #endif
 extern prototype_ssimpf(vp8_ssimpf_8x8)
 
+#ifndef vp8_ssimpf_16x16
+#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_c
+#endif
+extern prototype_ssimpf(vp8_ssimpf_16x16)
+
 typedef prototype_sad(*vp8_sad_fn_t);
 typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t);
 typedef prototype_sad_multi_same_address_1(*vp8_sad_multi1_fn_t);
@@ -394,7 +394,7 @@ typedef struct
 
 #if CONFIG_INTERNAL_STATS
     vp8_ssimpf_fn_t          ssimpf_8x8;
-    vp8_ssimpf_fn_t          ssimpf;
+    vp8_ssimpf_fn_t          ssimpf_16x16;
 #endif
 
 } vp8_variance_rtcd_vtable_t;
@@ -417,8 +417,10 @@ typedef struct
 
 #if CONFIG_RUNTIME_CPU_DETECT
 #define VARIANCE_INVOKE(ctx,fn) (ctx)->fn
+#define SSIMPF_INVOKE(ctx,fn) (ctx)->ssimpf_##fn
 #else
 #define VARIANCE_INVOKE(ctx,fn) vp8_variance_##fn
+#define SSIMPF_INVOKE(ctx,fn) vp8_ssimpf_##fn
 #endif
 
 #endif
index d5d267a69847ba989b6cb4aaa17783043835df5d..8af4b4533fad1487d30120a2e8d34fa0b45ecff6 100644 (file)
@@ -44,7 +44,7 @@
         paddd           %1, xmm1
         SUM_ACROSS_Q    %1
 %endmacro
-;void ssim_parms_sse3(
+;void ssim_parms_sse2(
 ;    unsigned char *s,
 ;    int sp,
 ;    unsigned char *r,
@@ -61,8 +61,8 @@
 ; or pavgb At this point this is just meant to be first pass for calculating
 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion
 ; in mode selection code.
-global sym(vp8_ssim_parms_16x16_sse3)
-sym(vp8_ssim_parms_16x16_sse3):
+global sym(vp8_ssim_parms_16x16_sse2)
+sym(vp8_ssim_parms_16x16_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 9
@@ -134,7 +134,7 @@ NextRow:
     pop         rbp
     ret
 
-;void ssim_parms_sse3(
+;void ssim_parms_sse2(
 ;    unsigned char *s,
 ;    int sp,
 ;    unsigned char *r,
@@ -151,8 +151,8 @@ NextRow:
 ; or pavgb At this point this is just meant to be first pass for calculating
 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion
 ; in mode selection code.
-global sym(vp8_ssim_parms_8x8_sse3)
-sym(vp8_ssim_parms_8x8_sse3):
+global sym(vp8_ssim_parms_8x8_sse2)
+sym(vp8_ssim_parms_8x8_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 9
index af6c4d27ed681191aceabd45c6a2ec4da443d006..4b41b5436c102c0831b4e8d1398dc949dd167495 100644 (file)
@@ -140,6 +140,8 @@ extern prototype_getmbss(vp8_get_mb_ss_sse2);
 extern prototype_variance(vp8_mse16x16_wmt);
 extern prototype_variance2(vp8_get8x8var_sse2);
 extern prototype_variance2(vp8_get16x16var_sse2);
+extern prototype_ssimpf(vp8_ssim_parms_8x8_sse2)
+extern prototype_ssimpf(vp8_ssim_parms_16x16_sse2)
 
 #if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_variance_sad4x4
@@ -208,6 +210,14 @@ extern prototype_variance2(vp8_get16x16var_sse2);
 #undef  vp8_variance_mse16x16
 #define vp8_variance_mse16x16 vp8_mse16x16_wmt
 
+#if ARCH_X86_64
+#undef  vp8_ssimpf_8x8
+#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_sse2
+
+#undef  vp8_ssimpf_16x16
+#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_sse2
+#endif
+
 #endif
 #endif
 
index badb9f04451b040f08713ca76d0263af16e82bda..36b7b7194587c0d45cd3bb69137c56a5c0cde10a 100644 (file)
@@ -111,29 +111,6 @@ void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
 
 #endif
 
-#if HAVE_SSSE3
-#if CONFIG_INTERNAL_STATS
-#if ARCH_X86_64
-typedef void ssimpf
-(
-    unsigned char *s,
-    int sp,
-    unsigned char *r,
-    int rp,
-    unsigned long *sum_s,
-    unsigned long *sum_r,
-    unsigned long *sum_sq_s,
-    unsigned long *sum_sq_r,
-    unsigned long *sum_sxr
-);
-
-extern ssimpf vp8_ssim_parms_16x16_sse3;
-extern ssimpf vp8_ssim_parms_8x8_sse3;
-#endif
-#endif
-#endif
-
-
 void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
 {
 #if CONFIG_RUNTIME_CPU_DETECT
@@ -245,6 +222,13 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
 
 #if !(CONFIG_REALTIME_ONLY)
         cpi->rtcd.temporal.apply                 = vp8_temporal_filter_apply_sse2;
+#endif
+
+#if CONFIG_INTERNAL_STATS
+#if ARCH_X86_64
+        cpi->rtcd.variance.ssimpf_8x8            = vp8_ssim_parms_8x8_sse2;
+        cpi->rtcd.variance.ssimpf_16x16          = vp8_ssim_parms_16x16_sse2;
+#endif
 #endif
     }
 #endif
@@ -280,14 +264,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_ssse3;
 
         cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_ssse3;
-
-#if CONFIG_INTERNAL_STATS
-#if ARCH_X86_64
-        cpi->rtcd.variance.ssimpf_8x8            = vp8_ssim_parms_8x8_sse3;
-        cpi->rtcd.variance.ssimpf                = vp8_ssim_parms_16x16_sse3;
-#endif
-#endif
-
     }
 #endif