]> granicus.if.org Git - libvpx/commitdiff
Use fdct8x4 instead of fdct4x4 where the block size allows it.
authorRonald S. Bultje <rbultje@google.com>
Wed, 6 Feb 2013 22:13:05 +0000 (14:13 -0800)
committerRonald S. Bultje <rbultje@google.com>
Thu, 7 Feb 2013 00:13:02 +0000 (16:13 -0800)
This allows for faster SIMD implementations in the future (currently
there is no speed impact).

Change-Id: I732647e9148b5dcb44e6bc8728138f0141218329

vp9/encoder/vp9_encodeintra.c
vp9/encoder/vp9_encodemb.c
vp9/encoder/vp9_rdopt.c

index 09ea045d7316609a6600528b1a54625dba117d45..eacc2cd283625117e97bdc9af653f6c349e33eed 100644 (file)
@@ -168,6 +168,12 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
         vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);
         vp9_ht_quantize_b_4x4(be, b, tx_type);
         vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
+      } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
+        x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+        x->quantize_b_4x4_pair(be, be + 1, b, b + 1);
+        vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);
+        vp9_inverse_transform_b_4x4(xd, ib + iblock[i] + 1, 32);
+        i++;
       } else {
         x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
         x->quantize_b_4x4(be, b);
index 45278a71b04d888ad766396c7d3f49c783ac84eb..52eabf1298ee28a4f4129a90f2dd556fcdbc5c44 100644 (file)
@@ -187,6 +187,10 @@ void vp9_transform_mby_4x4(MACROBLOCK *x) {
     if (tx_type != DCT_DCT) {
       assert(has_2nd_order == 0);
       vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 4);
+    } else if (!(i & 1) && get_tx_type_4x4(xd, &xd->block[i + 1]) == DCT_DCT) {
+      x->vp9_short_fdct8x4(&x->block[i].src_diff[0],
+                           &x->block[i].coeff[0], 32);
+      i++;
     } else {
       x->vp9_short_fdct4x4(&x->block[i].src_diff[0],
                            &x->block[i].coeff[0], 32);
index 2868db55f9fb13e7cdb84fd24b96632ef220a817..5324db530f1e11fa049212b28f17d2cd8f860d72 100644 (file)
@@ -1459,21 +1459,33 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
       distortion = 0;
       rate_t = 0;
       for (i = 0; i < 4; ++i) {
+        int do_two = 0;
         b = &xd->block[ib + iblock[i]];
         be = &x->block[ib + iblock[i]];
         tx_type = get_tx_type_4x4(xd, b);
         if (tx_type != DCT_DCT) {
           vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);
           vp9_ht_quantize_b_4x4(be, b, tx_type);
+        } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
+          x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+          x->quantize_b_4x4_pair(be, be + 1, b, b + 1);
+          do_two = 1;
         } else {
           x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
           x->quantize_b_4x4(be, b);
         }
-        distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16);
+        distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two);
         rate_t += cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC,
                               // i&1 ? &ta1 : &ta0, i&2 ? &tl1 : &tl0,
                               &ta0, &tl0,
                               TX_4X4);
+        if (do_two) {
+          rate_t += cost_coeffs(x, b + 1, PLANE_TYPE_Y_WITH_DC,
+                                // i&1 ? &ta1 : &ta0, i&2 ? &tl1 : &tl0,
+                                &ta0, &tl0,
+                                TX_4X4);
+          i++;
+        }
       }
       b = &xd->block[ib];
       be = &x->block[ib];