]> granicus.if.org Git - libvpx/commitdiff
Add Neon intrinsic vp9_fdct8x8_quant_neon
authorFrank Galligan <fgalligan@google.com>
Fri, 16 Jan 2015 03:29:46 +0000 (19:29 -0800)
committerFrank Galligan <fgalligan@google.com>
Sun, 25 Jan 2015 06:49:50 +0000 (22:49 -0800)
On Nexus 7 speed -5 got ~2%, -6 got ~15%, -7 and -8 got ~30%
increase in perf.

Tested on Nexus 7, built with ndk r10d, gcc 4.9.

Change-Id: I83246d63b96674d170098a572fa4fe28a05aaf51

vp9/common/vp9_rtcd_defs.pl
vp9/encoder/arm/neon/vp9_dct_neon.c

index 947d8c7bbd9de9016b9f3099d2fb05dcf9f9892f..12f076fed9b9e4fb2f9ff8245bad0eb498504868 100644 (file)
@@ -1166,7 +1166,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64";
 
   add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/vp9_fdct8x8_quant sse2 ssse3/;
+  specialize qw/vp9_fdct8x8_quant sse2 ssse3 neon/;
 }
 
 #
index 6c66f5d5bc98f263878e29de7c5d00cc076ed7d2..a6d4797adae76616add870a09e4d93c41188ff2f 100644 (file)
@@ -32,6 +32,24 @@ void vp9_fdct8x8_1_neon(const int16_t *input, int16_t *output, int stride) {
   }
 }
 
+void vp9_fdct8x8_quant_neon(const int16_t *input, int stride,
+                            int16_t* coeff_ptr, intptr_t n_coeffs,
+                            int skip_block, const int16_t* zbin_ptr,
+                            const int16_t* round_ptr, const int16_t* quant_ptr,
+                            const int16_t* quant_shift_ptr,
+                            int16_t* qcoeff_ptr, int16_t* dqcoeff_ptr,
+                            const int16_t* dequant_ptr, uint16_t* eob_ptr,
+                            const int16_t* scan_ptr,
+                            const int16_t* iscan_ptr) {
+  int16_t temp_buffer[64];
+  (void)coeff_ptr;
+
+  vp9_fdct8x8_neon(input, temp_buffer, stride);
+  vp9_quantize_fp_neon(temp_buffer, n_coeffs, skip_block, zbin_ptr, round_ptr,
+                       quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
+                       dequant_ptr, eob_ptr, scan_ptr, iscan_ptr);
+}
+
 void vp9_fdct8x8_neon(const int16_t *input, int16_t *final_output, int stride) {
   int i;
   // stage 1