Rd fixes and cleanups

author Debargha Mukherjee <debargha@google.com>

Wed, 7 Sep 2016 17:08:13 +0000 (10:08 -0700)

committer Debargha Mukherjee <debargha@google.com>

Thu, 8 Sep 2016 22:48:05 +0000 (15:48 -0700)
author Debargha Mukherjee <debargha@google.com>
Wed, 7 Sep 2016 17:08:13 +0000 (10:08 -0700)
committer Debargha Mukherjee <debargha@google.com>
Thu, 8 Sep 2016 22:48:05 +0000 (15:48 -0700)
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c

index f27a100947b453442675ee6de27b2489689d3a02..2980fad1d42c090d2259ae74f9a4001217842247 100644 (file)
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1062,16 +1062,10 @@ static void dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, int block,
    }
  }
  
-static int rate_block(int plane, int block, int blk_row, int blk_col,
-                      int coeff_ctx, TX_SIZE tx_size,
+static int rate_block(int plane, int block, int coeff_ctx, TX_SIZE tx_size,
                        struct rdcost_block_args *args) {
-  int coeff_cost =
-      cost_coeffs(args->x, plane, block, coeff_ctx, tx_size, args->so->scan,
-                  args->so->neighbors, args->use_fast_coef_costing);
-  const struct macroblock_plane *p = &args->x->plane[plane];
-  *(args->t_above + blk_col) = !(p->eobs[block] == 0);
-  *(args->t_left + blk_row) = !(p->eobs[block] == 0);
-  return coeff_cost;
+  return cost_coeffs(args->x, plane, block, coeff_ctx, tx_size, args->so->scan,
+                     args->so->neighbors, args->use_fast_coef_costing);
  }
  
  static uint64_t sum_squares_2d(const int16_t *diff, int diff_stride,
@@ -1189,7 +1183,10 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
      return;
    }
  
-  rate = rate_block(plane, block, blk_row, blk_col, coeff_ctx, tx_size, args);
+  rate = rate_block(plane, block, coeff_ctx, tx_size, args);
+  args->t_above[blk_col] = (x->plane[plane].eobs[block] > 0);
+  args->t_left[blk_row] = (x->plane[plane].eobs[block] > 0);
+
    rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
    rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
  
@@ -1877,7 +1874,8 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
                                       const int *bmode_costs, ENTROPY_CONTEXT *a,
                                       ENTROPY_CONTEXT *l, int *bestrate,
                                       int *bestratey, int64_t *bestdistortion,
-                                     BLOCK_SIZE bsize, int64_t rd_thresh) {
+                                     BLOCK_SIZE bsize, int *y_skip,
+                                     int64_t rd_thresh) {
    PREDICTION_MODE mode;
    MACROBLOCKD *const xd = &x->e_mbd;
    int64_t best_rd = rd_thresh;
@@ -1892,6 +1890,7 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
    const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
    const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
    int idx, idy;
+  int best_can_skip = 0;
    uint8_t best_dst[8 * 8];
  #if CONFIG_AOM_HIGHBITDEPTH
    uint16_t best_dst16[8 * 8];
@@ -1909,6 +1908,7 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
        int ratey = 0;
        int64_t distortion = 0;
        int rate = bmode_costs[mode];
+      int can_skip = 1;
  
        if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue;
  
@@ -1949,6 +1949,7 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
                                   so->neighbors, cpi->sf.use_fast_coef_costing);
              *(tempa + idx) = !(p->eobs[block] == 0);
              *(templ + idy) = !(p->eobs[block] == 0);
+            can_skip &= (p->eobs[block] == 0);
              if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
                goto next_highbd;
              av1_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
@@ -1973,6 +1974,7 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
                                   so->neighbors, cpi->sf.use_fast_coef_costing);
              *(tempa + idx) = !(p->eobs[block] == 0);
              *(templ + idy) = !(p->eobs[block] == 0);
+            can_skip &= (p->eobs[block] == 0);
              av1_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
                                          dst_stride, p->eobs[block], xd->bd,
                                          tx_type, 0);
@@ -1993,6 +1995,7 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
          *bestratey = ratey;
          *bestdistortion = distortion;
          best_rd = this_rd;
+        best_can_skip = can_skip;
          *best_mode = mode;
          memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
          memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
@@ -2007,6 +2010,8 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
  
      if (best_rd >= rd_thresh) return best_rd;
  
+    if (y_skip) *y_skip &= best_can_skip;
+
      for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
        memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
               best_dst16 + idy * 8, num_4x4_blocks_wide * 4 * sizeof(uint16_t));
@@ -2021,6 +2026,7 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
      int ratey = 0;
      int64_t distortion = 0;
      int rate = bmode_costs[mode];
+    int can_skip = 1;
  
      if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue;
  
@@ -2061,6 +2067,7 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
                                 so->neighbors, cpi->sf.use_fast_coef_costing);
            *(tempa + idx) = !(p->eobs[block] == 0);
            *(templ + idy) = !(p->eobs[block] == 0);
+          can_skip &= (p->eobs[block] == 0);
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next;
            av1_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
@@ -2084,6 +2091,7 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
                                 so->neighbors, cpi->sf.use_fast_coef_costing);
            *(tempa + idx) = !(p->eobs[block] == 0);
            *(templ + idy) = !(p->eobs[block] == 0);
+          can_skip &= (p->eobs[block] == 0);
            av1_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
                                 dst_stride, p->eobs[block], tx_type, 0);
            cpi->fn_ptr[BLOCK_4X4].vf(src, src_stride, dst, dst_stride, &tmp);
@@ -2106,6 +2114,7 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
        *bestratey = ratey;
        *bestdistortion = distortion;
        best_rd = this_rd;
+      best_can_skip = can_skip;
        *best_mode = mode;
        memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
        memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
@@ -2118,6 +2127,8 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
  
    if (best_rd >= rd_thresh) return best_rd;
  
+  if (y_skip) *y_skip &= best_can_skip;
+
    for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
      memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
             num_4x4_blocks_wide * 4);
@@ -2127,7 +2138,7 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
  
  static int64_t rd_pick_intra_sub_8x8_y_mode(AV1_COMP *cpi, MACROBLOCK *mb,
                                              int *rate, int *rate_y,
-                                            int64_t *distortion,
+                                            int64_t *distortion, int *y_skip,
                                              int64_t best_rd) {
    int i, j;
    const MACROBLOCKD *const xd = &mb->e_mbd;
@@ -2154,6 +2165,8 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(AV1_COMP *cpi, MACROBLOCK *mb,
    mic->mbmi.tx_type = DCT_DCT;
    mic->mbmi.tx_size = TX_4X4;
  
+  if (y_skip) *y_skip = 1;
+
    // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
    for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
      for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
@@ -2171,7 +2184,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(AV1_COMP *cpi, MACROBLOCK *mb,
        this_rd = rd_pick_intra4x4block(
            cpi, mb, idy, idx, &best_mode, bmode_costs,
            xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r,
-          &ry, &d, bsize, best_rd - total_rd);
+          &ry, &d, bsize, y_skip, best_rd - total_rd);
        if (this_rd >= best_rd - total_rd) return INT64_MAX;
  
        total_rd += this_rd;
@@ -7810,9 +7823,8 @@ void av1_rd_pick_intra_mode_sb(AV1_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
        return;
      }
    } else {
-    y_skip = 0;
      if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
-                                     &dist_y, best_rd) >= best_rd) {
+                                     &dist_y, &y_skip, best_rd) >= best_rd) {
        rd_cost->rate = INT_MAX;
        return;
      }
@@ -10176,7 +10188,7 @@ void av1_rd_pick_inter_mode_sub8x8(struct AV1_COMP *cpi, TileDataEnc *tile_data,
      if (ref_frame == INTRA_FRAME) {
        int rate;
        if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, &distortion_y,
-                                       best_rd) >= best_rd)
+                                       NULL, best_rd) >= best_rd)
          continue;
        rate2 += rate;
        rate2 += intra_cost_penalty;
author	Debargha Mukherjee <debargha@google.com>
	Wed, 7 Sep 2016 17:08:13 +0000 (10:08 -0700)
committer	Debargha Mukherjee <debargha@google.com>
	Thu, 8 Sep 2016 22:48:05 +0000 (15:48 -0700)