cost_coefs approximation speed up

author Jim Bankoski <jimbankoski@google.com>

Mon, 3 Mar 2014 19:49:13 +0000 (11:49 -0800)

committer Jim Bankoski <jimbankoski@google.com>

Mon, 3 Mar 2014 19:49:13 +0000 (11:49 -0800)
author Jim Bankoski <jimbankoski@google.com>
Mon, 3 Mar 2014 19:49:13 +0000 (11:49 -0800)
committer Jim Bankoski <jimbankoski@google.com>
Mon, 3 Mar 2014 19:49:13 +0000 (11:49 -0800)
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c

index d7bac6f0bcd7b0d7646272cedc920987c4341efb..db0fd80fa1cc37a2537e1a99edcb0aa183cdcc5a 100644 (file)
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -662,6 +662,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
      sf->use_lp32x32fdct = 1;
      sf->subpel_iters_per_step = 1;
      sf->use_fast_coef_updates = 2;
+    sf->use_fast_coef_costing = 1;
  
      sf->adaptive_rd_thresh = 4;
      sf->mode_skip_start = 6;
@@ -698,6 +699,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
      sf->use_lp32x32fdct = 1;
      sf->subpel_iters_per_step = 1;
      sf->use_fast_coef_updates = 2;
+    sf->use_fast_coef_costing = 1;
  
      sf->adaptive_rd_thresh = 4;
      sf->mode_skip_start = 6;
@@ -730,6 +732,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
        sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
      }
      sf->use_fast_coef_updates = 2;
+    sf->use_fast_coef_costing = 1;
      sf->adaptive_rd_thresh = 4;
      sf->mode_skip_start = 6;
    }
@@ -742,6 +745,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
    sf->adaptive_rd_thresh = 1;
    sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW);
    sf->encode_breakout_thresh = 1;
+  sf->use_fast_coef_costing = 1;
  
    if (speed == 1) {
      sf->use_square_partition_only = !frame_is_intra_only(cm);
@@ -923,6 +927,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
    sf->use_uv_intra_rd_estimate = 0;
    sf->use_fast_lpf_pick = 0;
    sf->use_fast_coef_updates = 0;
+  sf->use_fast_coef_costing = 0;
    sf->mode_skip_start = MAX_MODES;  // Mode index at which mode skip mask set
    sf->use_nonrd_pick_mode = 0;
    sf->encode_breakout_thresh = 0;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h

index 019cb138802f6e48c3cd7ce61919b2cf6180493e..f1e5e3a68ad32795eb97155b42af2edc600f802b 100644 (file)
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -420,6 +420,10 @@ typedef struct {
    // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV
    // modes are disabled in order from LSB to MSB for each BLOCK_SIZE.
    int disable_inter_mode_mask[BLOCK_SIZES];
+
+  // This feature controls whether we do the expensive context update and
+  // calculation in the rd coefficient costing loop.
+  int use_fast_coef_costing;
  } SPEED_FEATURES;
  
  typedef struct {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c

index 76683dae40a3098935eabff0262c69fbb1d6afe1..3b2d78889d6f4dad34be15eb0d079d5814d9ca63 100644 (file)
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -72,6 +72,7 @@ struct rdcost_block_args {
    int64_t this_rd;
    int64_t best_rd;
    int skip;
+  int use_fast_coef_costing;
    const scan_order *so;
  };
  
@@ -549,12 +550,12 @@ static const int16_t band_counts[TX_SIZES][8] = {
    { 1, 2, 3, 4, 11,  256 - 21, 0 },
    { 1, 2, 3, 4, 11, 1024 - 21, 0 },
  };
-
  static INLINE int cost_coeffs(MACROBLOCK *x,
                                int plane, int block,
                                ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
                                TX_SIZE tx_size,
-                              const int16_t *scan, const int16_t *nb) {
+                              const int16_t *scan, const int16_t *nb,
+                              int use_fast_coef_costing) {
    MACROBLOCKD *const xd = &x->e_mbd;
    MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
    struct macroblock_plane *p = &x->plane[plane];
@@ -568,7 +569,6 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
    uint8_t *p_tok = x->token_cache;
    int pt = combine_entropy_contexts(*A, *L);
    int c, cost;
-
    // Check for consistency of tx_size with mode info
    assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
                                : get_uv_tx_size(mbmi) == tx_size);
@@ -594,9 +594,13 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
  
        v = qcoeff[rc];
        t = vp9_dct_value_tokens_ptr[v].token;
-      pt = get_coef_context(nb, p_tok, c);
-      cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
-      p_tok[rc] = vp9_pt_energy_class[t];
+      if (use_fast_coef_costing) {
+        cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
+      } else {
+        pt = get_coef_context(nb, p_tok, c);
+        cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
+        p_tok[rc] = vp9_pt_energy_class[t];
+      }
        prev_t = t;
        if (!--band_left) {
          band_left = *band_count++;
@@ -606,8 +610,12 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
  
      // eob token
      if (band_left) {
-      pt = get_coef_context(nb, p_tok, c);
-      cost += (*token_costs)[0][pt][EOB_TOKEN];
+      if (use_fast_coef_costing) {
+        cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
+      } else {
+        pt = get_coef_context(nb, p_tok, c);
+        cost += (*token_costs)[0][pt][EOB_TOKEN];
+      }
      }
    }
  
@@ -616,7 +624,6 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
  
    return cost;
  }
-
  static void dist_block(int plane, int block, TX_SIZE tx_size,
                         struct rdcost_block_args* args) {
    const int ss_txfrm_size = tx_size << 1;
@@ -648,7 +655,8 @@ static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
  
    args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
                             args->t_left + y_idx, tx_size,
-                           args->so->scan, args->so->neighbors);
+                           args->so->scan, args->so->neighbors,
+                           args->use_fast_coef_costing);
  }
  
  static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -732,12 +740,14 @@ static void txfm_rd_in_plane(MACROBLOCK *x,
                               int *rate, int64_t *distortion,
                               int *skippable, int64_t *sse,
                               int64_t ref_best_rd, int plane,
-                             BLOCK_SIZE bsize, TX_SIZE tx_size) {
+                             BLOCK_SIZE bsize, TX_SIZE tx_size,
+                             int use_fast_coef_casting) {
    MACROBLOCKD *const xd = &x->e_mbd;
    struct macroblockd_plane *const pd = &xd->plane[plane];
    struct rdcost_block_args args = { 0 };
    args.x = x;
    args.best_rd = ref_best_rd;
+  args.use_fast_coef_costing = use_fast_coef_casting;
  
    if (plane == 0)
      xd->mi_8x8[0]->mbmi.tx_size = tx_size;
@@ -776,7 +786,7 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
  
    txfm_rd_in_plane(x, rate, distortion, skip,
                     &sse[mbmi->tx_size], ref_best_rd, 0, bs,
-                   mbmi->tx_size);
+                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
    cpi->tx_stepdown_count[0]++;
  }
  
@@ -920,7 +930,8 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
    // Actually encode using the chosen mode if a model was used, but do not
    // update the r, d costs
    txfm_rd_in_plane(x, rate, distortion, skip,
-                   &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size);
+                   &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size,
+                   cpi->sf.use_fast_coef_costing);
  
    if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
      cpi->tx_stepdown_count[0]++;
@@ -968,7 +979,8 @@ static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
      for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
        txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
                         &s[tx_size], &sse[tx_size],
-                       ref_best_rd, 0, bs, tx_size);
+                       ref_best_rd, 0, bs, tx_size,
+                       cpi->sf.use_fast_coef_costing);
      choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
                               skip, txfm_cache, bs);
    }
@@ -997,7 +1009,8 @@ static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
      for (tx_size = TX_4X4; tx_size <= max_txsize_lookup[bs]; ++tx_size)
        txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
                         &s[tx_size], &sse[tx_size],
-                       ref_best_rd, 0, bs, tx_size);
+                       ref_best_rd, 0, bs, tx_size,
+                       cpi->sf.use_fast_coef_costing);
      choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
                               skip, txfm_cache, bs);
    }
@@ -1100,7 +1113,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
            vp9_fwht4x4(src_diff, coeff, 8);
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
-                               so->scan, so->neighbors);
+                               so->scan, so->neighbors,
+                               cpi->sf.use_fast_coef_costing);
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next;
            vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
@@ -1112,7 +1126,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
            vp9_fht4x4(src_diff, coeff, 8, tx_type);
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
-                               so->scan, so->neighbors);
+                             so->scan, so->neighbors,
+                             cpi->sf.use_fast_coef_costing);
            distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
                                          16, &unused) >> 2;
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
@@ -1296,7 +1311,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
    return best_rd;
  }
  
-static void super_block_uvrd(MACROBLOCK *x,
+static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
                               int *rate, int64_t *distortion, int *skippable,
                               int64_t *sse, BLOCK_SIZE bsize,
                               int64_t ref_best_rd) {
@@ -1323,7 +1338,8 @@ static void super_block_uvrd(MACROBLOCK *x,
  
    for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
      txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
-                     ref_best_rd, plane, bsize, uv_txfm_size);
+                     ref_best_rd, plane, bsize, uv_txfm_size,
+                     cpi->sf.use_fast_coef_costing);
      if (pnrate == INT_MAX)
        goto term;
      *rate += pnrate;
@@ -1359,7 +1375,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
  
      xd->mi_8x8[0]->mbmi.uv_mode = mode;
  
-    super_block_uvrd(x, &this_rate_tokenonly,
+    super_block_uvrd(cpi, x, &this_rate_tokenonly,
                       &this_distortion, &s, &this_sse, bsize, best_rd);
      if (this_rate_tokenonly == INT_MAX)
        continue;
@@ -1402,14 +1418,15 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
    return best_rd;
  }
  
-static int64_t rd_sbuv_dcpred(const VP9_COMMON *cm, MACROBLOCK *x,
+static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x,
                                int *rate, int *rate_tokenonly,
                                int64_t *distortion, int *skippable,
                                BLOCK_SIZE bsize) {
+  const VP9_COMMON *cm = &cpi->common;
    int64_t unused;
  
    x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED;
-  super_block_uvrd(x, rate_tokenonly, distortion,
+  super_block_uvrd(cpi, x, rate_tokenonly, distortion,
                     skippable, &unused, bsize, INT64_MAX);
    *rate = *rate_tokenonly + x->intra_uv_mode_cost[cm->frame_type][DC_PRED];
    return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
@@ -1425,7 +1442,7 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
    // Use an estimated rd for uv_intra based on DC_PRED if the
    // appropriate speed flag is set.
    if (cpi->sf.use_uv_intra_rd_estimate) {
-    rd_sbuv_dcpred(&cpi->common, x, rate_uv, rate_uv_tokenonly, dist_uv,
+    rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv,
                     skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
    // Else do a proper rd search for each possible transform size that may
    // be considered in the main rd loop.
@@ -1588,7 +1605,8 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
                                          16, &ssz);
        thissse += ssz;
        thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
-                              so->scan, so->neighbors);
+                              so->scan, so->neighbors,
+                              cpi->sf.use_fast_coef_costing);
        rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
        rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
        rd = MIN(rd1, rd2);
@@ -3026,7 +3044,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
      rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
      rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
  
-    super_block_uvrd(x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
+    super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
                       bsize, ref_best_rd - rdcosty);
      if (*rate_uv == INT_MAX) {
        *rate2 = INT_MAX;
@@ -4145,7 +4163,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
          // then dont bother looking at UV
          vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
                                          BLOCK_8X8);
-        super_block_uvrd(x, &rate_uv, &distortion_uv, &uv_skippable,
+        super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
                           &uv_sse, BLOCK_8X8, tmp_best_rdu);
          if (rate_uv == INT_MAX)
            continue;
author	Jim Bankoski <jimbankoski@google.com>
	Mon, 3 Mar 2014 19:49:13 +0000 (11:49 -0800)
committer	Jim Bankoski <jimbankoski@google.com>
	Mon, 3 Mar 2014 19:49:13 +0000 (11:49 -0800)
vp9/encoder/vp9_onyx_if.c		patch \| blob \| history
vp9/encoder/vp9_onyx_int.h		patch \| blob \| history
vp9/encoder/vp9_rdopt.c		patch \| blob \| history