]> granicus.if.org Git - libvpx/commitdiff
SSE2 inverse 4x4 2D-DCT with DC only
authorJingning Han <jingning@google.com>
Wed, 24 Jul 2013 22:41:43 +0000 (15:41 -0700)
committerJingning Han <jingning@google.com>
Thu, 25 Jul 2013 06:19:56 +0000 (23:19 -0700)
Add SSE2 implementation to handle the special case of inverse 2D-DCT
where only DC coefficient is non-zero.

Change-Id: I2c6a59e21e5e77b8cf39a4af5eecf4d5ade32e2f

vp9/common/vp9_rtcd_defs.sh
vp9/common/x86/vp9_idct_intrin_sse2.c

index f1789fb74e1ff2b8404a75b772ed0a6398b410b6..c357ef62a570098855b8c9d9e493c641505fde6c 100644 (file)
@@ -292,7 +292,7 @@ specialize vp9_convolve8_avg_vert ssse3 neon
 # dct
 #
 prototype void vp9_short_idct4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_short_idct4x4_1_add
+specialize vp9_short_idct4x4_1_add sse2
 
 prototype void vp9_short_idct4x4_add "int16_t *input, uint8_t *dest, int dest_stride"
 specialize vp9_short_idct4x4_add sse2
index b4766df5bc65ba6b76bd3297b17e85842f889898..a1e14b482ab9603238ffc878d62f9f9eb5ab752c 100644 (file)
@@ -148,6 +148,23 @@ void vp9_short_idct4x4_add_sse2(int16_t *input, uint8_t *dest, int stride) {
   RECON_AND_STORE4X4(dest, input3);
 }
 
+void vp9_short_idct4x4_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+  __m128i dc_value;
+  const __m128i zero = _mm_setzero_si128();
+  int a;
+
+  a = dct_const_round_shift(input[0] * cospi_16_64);
+  a = dct_const_round_shift(a * cospi_16_64);
+  a = ROUND_POWER_OF_TWO(a, 4);
+
+  dc_value = _mm_set1_epi16(a);
+
+  RECON_AND_STORE4X4(dest, dc_value);
+  RECON_AND_STORE4X4(dest, dc_value);
+  RECON_AND_STORE4X4(dest, dc_value);
+  RECON_AND_STORE4X4(dest, dc_value);
+}
+
 void vp9_idct4_1d_sse2(int16_t *input, int16_t *output) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i c1 = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64,