From 960582af7607afb695e99f61c0e7f6e806a9f0b3 Mon Sep 17 00:00:00 2001 From: Hui Su Date: Mon, 18 Jun 2018 16:07:41 -0700 Subject: [PATCH] Add a partition search breakout model for q-index between 150 and 200. Previously the ML based breakout feature is only supported for q-index larger than 200. This only affects speed 1 and 2, resolution under 720p, q-index between 150 and 200, low bit-depth. Compression performane change is neutral. Encoding speed gain is up to 30% for speed 1; up to 20% for speed 2. Results from encoding city_4cif_30fps: speed 1, QP=38 before: 37.689 dB, 41007b/f, 2.91 fps after: 37.687 dB, 40998b/f, 3.46 fps speed 1, QP=48 before: 35.959 dB, 22106b/f, 3.66 fps after: 35.950 dB, 22118b/f, 4.83 fps speed 2, QP=38 before: 37.630 dB, 40999b/f, 4.42 fps after: 37.633 dB, 41063b/f, 4.63 fps speed 2, QP=48 before: 35.905 dB, 22177b/f, 4.90 fps after: 35.889 dB, 22145b/f, 5.92 fps Change-Id: Ibd4a2f4d7093fb248ab94ddd388cbaa8de2c5ef7 --- vp9/encoder/vp9_encodeframe.c | 94 ++++++++++++++++++++++++++------ vp9/encoder/vp9_speed_features.c | 4 ++ vp9/encoder/vp9_speed_features.h | 1 + 3 files changed, 83 insertions(+), 16 deletions(-) diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 774d15251..b84e09c59 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -3317,20 +3317,73 @@ static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd, } #define FEATURES 4 -static const float partition_breakout_weights_64[FEATURES + 1] = { - -0.016673f, -0.001025f, -0.000032f, 0.000833f, 1.94261885f - 2.1f, +#define Q_CTX 2 +static const float partition_breakout_weights_64[Q_CTX][FEATURES + 1] = { + { + -0.016673f, + -0.001025f, + -0.000032f, + 0.000833f, + 1.94261885f - 2.1f, + }, + { + -0.160867f, + -0.002101f, + 0.000011f, + 0.002448f, + 1.65738142f - 2.5f, + }, }; -static const float partition_breakout_weights_32[FEATURES + 1] = { - -0.010554f, -0.003081f, -0.000134f, 0.004491f, 1.68445992f - 3.5f, +static const float partition_breakout_weights_32[Q_CTX][FEATURES + 1] = { + { + -0.010554f, + -0.003081f, + -0.000134f, + 0.004491f, + 1.68445992f - 3.5f, + }, + { + -0.051489f, + -0.007609f, + 0.000016f, + 0.009792f, + 1.28089404f - 2.5f, + }, }; -static const float partition_breakout_weights_16[FEATURES + 1] = { - -0.013154f, -0.002404f, -0.000977f, 0.008450f, 2.57404566f - 5.5f, +static const float partition_breakout_weights_16[Q_CTX][FEATURES + 1] = { + { + -0.013154f, + -0.002404f, + -0.000977f, + 0.008450f, + 2.57404566f - 5.5f, + }, + { + -0.019146f, + -0.004018f, + 0.000064f, + 0.008187f, + 2.15043926f - 2.5f, + }, }; -static const float partition_breakout_weights_8[FEATURES + 1] = { - -0.011807f, -0.009873f, -0.000931f, 0.034768f, 1.32254851f - 2.0f, +static const float partition_breakout_weights_8[Q_CTX][FEATURES + 1] = { + { + -0.011807f, + -0.009873f, + -0.000931f, + 0.034768f, + 1.32254851f - 2.0f, + }, + { + -0.003861f, + -0.002701f, + 0.000100f, + 0.013876f, + 1.96755111f - 1.5f, + }, }; // ML-based partition search breakout. @@ -3338,22 +3391,30 @@ static int ml_predict_breakout(const VP9_COMP *const cpi, BLOCK_SIZE bsize, const MACROBLOCK *const x, const RD_COST *const rd_cost) { DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 }; + const VP9_COMMON *const cm = &cpi->common; float features[FEATURES]; const float *linear_weights = NULL; // Linear model weights. float linear_score = 0.0f; + const int qindex = cm->base_qindex; + const int q_ctx = qindex >= 200 ? 0 : 1; switch (bsize) { - case BLOCK_64X64: linear_weights = partition_breakout_weights_64; break; - case BLOCK_32X32: linear_weights = partition_breakout_weights_32; break; - case BLOCK_16X16: linear_weights = partition_breakout_weights_16; break; - case BLOCK_8X8: linear_weights = partition_breakout_weights_8; break; + case BLOCK_64X64: + linear_weights = partition_breakout_weights_64[q_ctx]; + break; + case BLOCK_32X32: + linear_weights = partition_breakout_weights_32[q_ctx]; + break; + case BLOCK_16X16: + linear_weights = partition_breakout_weights_16[q_ctx]; + break; + case BLOCK_8X8: linear_weights = partition_breakout_weights_8[q_ctx]; break; default: assert(0 && "Unexpected block size."); return 0; } if (!linear_weights) return 0; { // Generate feature values. - const VP9_COMMON *const cm = &cpi->common; - const int ac_q = vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth); + const int ac_q = vp9_ac_quant(qindex, 0, cm->bit_depth); const int num_pels_log2 = num_pels_log2_lookup[bsize]; int feature_index = 0; unsigned int var, sse; @@ -3385,9 +3446,10 @@ static int ml_predict_breakout(const VP9_COMP *const cpi, BLOCK_SIZE bsize, linear_score += linear_weights[i] * features[i]; } - return linear_score >= 0; + return linear_score >= cpi->sf.ml_partition_search_breakout_thresh[q_ctx]; } #undef FEATURES +#undef Q_CTX // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are // unlikely to be selected depending on previous rate-distortion optimization @@ -3577,7 +3639,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, if (!x->e_mbd.lossless && ctx->skippable) { int use_ml_based_breakout = cpi->sf.use_ml_partition_search_breakout && - cm->base_qindex >= 200; + cm->base_qindex >= 150; #if CONFIG_VP9_HIGHBITDEPTH if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) use_ml_based_breakout = 0; diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 254c4e2b1..7a02623dc 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -83,6 +83,8 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; sf->partition_search_breakout_thr.dist = (1 << 21); sf->use_ml_partition_search_breakout = 1; + sf->ml_partition_search_breakout_thresh[0] = 0.0f; + sf->ml_partition_search_breakout_thresh[1] = 0.0f; } } @@ -97,6 +99,8 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; sf->partition_search_breakout_thr.dist = (1 << 22); sf->partition_search_breakout_thr.rate = 100; + sf->ml_partition_search_breakout_thresh[0] = 0.0f; + sf->ml_partition_search_breakout_thresh[1] = -1.0f; } sf->rd_auto_partition_min_limit = set_partition_min_limit(cm); diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index eede9cbe2..7a9b3a622 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -472,6 +472,7 @@ typedef struct SPEED_FEATURES { // Use ML-based partition search early breakout. int use_ml_partition_search_breakout; + float ml_partition_search_breakout_thresh[2]; // Machine-learning based partition search early termination int ml_partition_search_early_termination; -- 2.40.0