From a220b931f5b5fccf4ecc758649806dc48bea1b7c Mon Sep 17 00:00:00 2001 From: Jerome Jiang Date: Tue, 27 Jun 2017 13:17:59 -0700 Subject: [PATCH] vp9: compute skinmap only once before encoding. This could save some cycles since skin detection is used in multiple places in vp9. 1~2% speed up on ARM. Change-Id: I86b731945f85215bbb0976021cd0f2040ff2687c --- vp9/encoder/vp9_encodeframe.c | 11 +--- vp9/encoder/vp9_encoder.c | 9 ++- vp9/encoder/vp9_encoder.h | 2 + vp9/encoder/vp9_noise_estimate.c | 13 +---- vp9/encoder/vp9_skin_detection.c | 96 +++++++++++++++++++------------- vp9/encoder/vp9_skin_detection.h | 4 +- 6 files changed, 74 insertions(+), 61 deletions(-) diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 2b694a389..8a856d889 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -743,16 +743,7 @@ static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res, for (i = 0; i < ymis; i += 2) { for (j = 0; j < xmis; j += 2) { int bl_index = block_index + i * cm->mi_cols + j; - int bl_index1 = bl_index + 1; - int bl_index2 = bl_index + cm->mi_cols; - int bl_index3 = bl_index2 + 1; - int consec_zeromv = - VPXMIN(cpi->consec_zero_mv[bl_index], - VPXMIN(cpi->consec_zero_mv[bl_index1], - VPXMIN(cpi->consec_zero_mv[bl_index2], - cpi->consec_zero_mv[bl_index3]))); - int is_skin = vp9_compute_skin_block( - ysignal, usignal, vsignal, sp, spuv, BLOCK_16X16, consec_zeromv, 0); + int is_skin = cpi->skin_map[bl_index]; num_16x16_skin += is_skin; num_16x16_nonskin += (1 - is_skin); if (num_16x16_nonskin > 3) { diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index bf33168e7..728fb8eca 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -779,6 +779,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { cpi->nmvsadcosts_hp[0] = NULL; cpi->nmvsadcosts_hp[1] = NULL; + vpx_free(cpi->skin_map); + cpi->skin_map = NULL; + vpx_free(cpi->prev_partition); cpi->prev_partition = NULL; @@ -2046,6 +2049,9 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, realloc_segmentation_maps(cpi); + CHECK_MEM_ERROR(cm, cpi->skin_map, vpx_calloc(cm->mi_rows * cm->mi_cols, + sizeof(cpi->skin_map[0]))); + CHECK_MEM_ERROR(cm, cpi->alt_ref_aq, vp9_alt_ref_aq_create()); CHECK_MEM_ERROR( @@ -3550,6 +3556,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, cpi->oxcf.content != VP9E_CONTENT_SCREEN && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { cpi->use_skin_detection = 1; + vp9_compute_skin_map(cpi, BLOCK_16X16); } vp9_set_quantizer(cm, q); @@ -4424,7 +4431,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, #endif #ifdef OUTPUT_YUV_SKINMAP if (cpi->common.current_video_frame > 1) { - vp9_compute_skin_map(cpi, yuv_skinmap_file); + vp9_output_skin_map(cpi, yuv_skinmap_file); } #endif diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 7ab892000..f2af52f1b 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -542,6 +542,8 @@ typedef struct VP9_COMP { uint8_t *segmentation_map; + uint8_t *skin_map; + // segment threashold for encode breakout int segment_encode_breakout[MAX_SEGMENTS]; diff --git a/vp9/encoder/vp9_noise_estimate.c b/vp9/encoder/vp9_noise_estimate.c index e2239b44b..57f4fc078 100644 --- a/vp9/encoder/vp9_noise_estimate.c +++ b/vp9/encoder/vp9_noise_estimate.c @@ -190,21 +190,12 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { int bl_index1 = bl_index + 1; int bl_index2 = bl_index + cm->mi_cols; int bl_index3 = bl_index2 + 1; + int is_skin = 0; + if (cpi->use_skin_detection) is_skin = cpi->skin_map[bl_index]; // Only consider blocks that are likely steady background. i.e, have // been encoded as zero/low motion x (= thresh_consec_zeromv) frames // in a row. consec_zero_mv[] defined for 8x8 blocks, so consider all // 4 sub-blocks for 16x16 block. Also, avoid skin blocks. - int consec_zeromv = - VPXMIN(cpi->consec_zero_mv[bl_index], - VPXMIN(cpi->consec_zero_mv[bl_index1], - VPXMIN(cpi->consec_zero_mv[bl_index2], - cpi->consec_zero_mv[bl_index3]))); - int is_skin = 0; - if (cpi->use_skin_detection) { - is_skin = - vp9_compute_skin_block(src_y, src_u, src_v, src_ystride, - src_uvstride, bsize, consec_zeromv, 0); - } if (frame_low_motion && cpi->consec_zero_mv[bl_index] > thresh_consec_zeromv && cpi->consec_zero_mv[bl_index1] > thresh_consec_zeromv && diff --git a/vp9/encoder/vp9_skin_detection.c b/vp9/encoder/vp9_skin_detection.c index 175503ed4..cf6b364fe 100644 --- a/vp9/encoder/vp9_skin_detection.c +++ b/vp9/encoder/vp9_skin_detection.c @@ -36,44 +36,31 @@ int vp9_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v, } } -#ifdef OUTPUT_YUV_SKINMAP -// For viewing skin map on input source. -void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) { - int i, j, mi_row, mi_col, num_bl; +void vp9_compute_skin_map(VP9_COMP *const cpi, BLOCK_SIZE bsize) { + int mi_row, mi_col, num_bl; VP9_COMMON *const cm = &cpi->common; - uint8_t *y; const uint8_t *src_y = cpi->Source->y_buffer; const uint8_t *src_u = cpi->Source->u_buffer; const uint8_t *src_v = cpi->Source->v_buffer; const int src_ystride = cpi->Source->y_stride; const int src_uvstride = cpi->Source->uv_stride; - int y_bsize = 16; // Use 8x8 or 16x16. - int uv_bsize = y_bsize >> 1; - int ypos = y_bsize >> 1; - int uvpos = uv_bsize >> 1; - int shy = (y_bsize == 8) ? 3 : 4; - int shuv = shy - 1; - int fac = y_bsize / 8; + const int y_bsize = 4 << b_width_log2_lookup[bsize]; + const int uv_bsize = y_bsize >> 1; + const int shy = (y_bsize == 8) ? 3 : 4; + const int shuv = shy - 1; + const int fac = y_bsize / 8; // Use center pixel or average of center 2x2 pixels. - int mode_filter = 0; - YV12_BUFFER_CONFIG skinmap; - memset(&skinmap, 0, sizeof(YV12_BUFFER_CONFIG)); - if (vpx_alloc_frame_buffer(&skinmap, cm->width, cm->height, cm->subsampling_x, - cm->subsampling_y, VP9_ENC_BORDER_IN_PIXELS, - cm->byte_alignment)) { - vpx_free_frame_buffer(&skinmap); - return; - } - memset(skinmap.buffer_alloc, 128, skinmap.frame_size); - y = skinmap.y_buffer; + const int mode_filter = 0; + // Loop through blocks and set skin map based on center pixel of block. - // Set y to white for skin block, otherwise set to source with gray scale. // Ignore rightmost/bottom boundary blocks. for (mi_row = 0; mi_row < cm->mi_rows - 1; mi_row += fac) { num_bl = 0; for (mi_col = 0; mi_col < cm->mi_cols - 1; mi_col += fac) { int is_skin = 0; if (mode_filter == 1) { + const int ypos = y_bsize >> 1; + const int uvpos = uv_bsize >> 1; // Use 2x2 average at center. uint8_t ysource = src_y[ypos * src_ystride + ypos]; uint8_t usource = src_u[uvpos * src_uvstride + uvpos]; @@ -92,13 +79,12 @@ void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) { vsource = (vsource + vsource2 + vsource3 + vsource4) >> 2; is_skin = vpx_skin_pixel(ysource, usource, vsource, 1); } else { - int block_size = BLOCK_8X8; int consec_zeromv = 0; int bl_index = mi_row * cm->mi_cols + mi_col; int bl_index1 = bl_index + 1; int bl_index2 = bl_index + cm->mi_cols; int bl_index3 = bl_index2 + 1; - if (y_bsize == 8) + if (bsize == BLOCK_8X8) consec_zeromv = cpi->consec_zero_mv[bl_index]; else consec_zeromv = @@ -106,29 +92,63 @@ void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) { VPXMIN(cpi->consec_zero_mv[bl_index1], VPXMIN(cpi->consec_zero_mv[bl_index2], cpi->consec_zero_mv[bl_index3]))); - if (y_bsize == 16) block_size = BLOCK_16X16; - is_skin = - vp9_compute_skin_block(src_y, src_u, src_v, src_ystride, - src_uvstride, block_size, consec_zeromv, 0); + is_skin = vp9_compute_skin_block(src_y, src_u, src_v, src_ystride, + src_uvstride, bsize, consec_zeromv, 0); } + cpi->skin_map[mi_row * cm->mi_cols + mi_col] = is_skin; + num_bl++; + src_y += y_bsize; + src_u += uv_bsize; + src_v += uv_bsize; + } + src_y += (src_ystride << shy) - (num_bl << shy); + src_u += (src_uvstride << shuv) - (num_bl << shuv); + src_v += (src_uvstride << shuv) - (num_bl << shuv); + } +} + +#ifdef OUTPUT_YUV_SKINMAP +// For viewing skin map on input source. +void vp9_output_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) { + int i, j, mi_row, mi_col, num_bl; + VP9_COMMON *const cm = &cpi->common; + uint8_t *y; + const uint8_t *src_y = cpi->Source->y_buffer; + const int src_ystride = cpi->Source->y_stride; + + const int y_bsize = 16; // Use 8x8 or 16x16. + const int shy = (y_bsize == 8) ? 3 : 4; + const int fac = y_bsize / 8; + + YV12_BUFFER_CONFIG skinmap; + memset(&skinmap, 0, sizeof(YV12_BUFFER_CONFIG)); + if (vpx_alloc_frame_buffer(&skinmap, cm->width, cm->height, cm->subsampling_x, + cm->subsampling_y, VP9_ENC_BORDER_IN_PIXELS, + cm->byte_alignment)) { + vpx_free_frame_buffer(&skinmap); + return; + } + memset(skinmap.buffer_alloc, 128, skinmap.frame_size); + y = skinmap.y_buffer; + // Loop through blocks and set skin map based on center pixel of block. + // Set y to white for skin block, otherwise set to source with gray scale. + // Ignore rightmost/bottom boundary blocks. + for (mi_row = 0; mi_row < cm->mi_rows - 1; mi_row += fac) { + num_bl = 0; + for (mi_col = 0; mi_col < cm->mi_cols - 1; mi_col += fac) { + const int block_index = mi_row * cm->mi_cols + mi_col; + const int is_skin = cpi->skin_map[block_index]; for (i = 0; i < y_bsize; i++) { for (j = 0; j < y_bsize; j++) { - if (is_skin) - y[i * src_ystride + j] = 255; - else - y[i * src_ystride + j] = src_y[i * src_ystride + j]; + y[i * src_ystride + j] = is_skin ? 255 : src_y[i * src_ystride + j]; } } num_bl++; y += y_bsize; src_y += y_bsize; - src_u += uv_bsize; - src_v += uv_bsize; } y += (src_ystride << shy) - (num_bl << shy); src_y += (src_ystride << shy) - (num_bl << shy); - src_u += (src_uvstride << shuv) - (num_bl << shuv); - src_v += (src_uvstride << shuv) - (num_bl << shuv); } vpx_write_yuv_frame(yuv_skinmap_file, &skinmap); vpx_free_frame_buffer(&skinmap); diff --git a/vp9/encoder/vp9_skin_detection.h b/vp9/encoder/vp9_skin_detection.h index bb7ef87d8..6b5f149c9 100644 --- a/vp9/encoder/vp9_skin_detection.h +++ b/vp9/encoder/vp9_skin_detection.h @@ -25,9 +25,11 @@ int vp9_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v, int stride, int strideuv, int bsize, int consec_zeromv, int curr_motion_magn); +void vp9_compute_skin_map(struct VP9_COMP *const cpi, BLOCK_SIZE bsize); + #ifdef OUTPUT_YUV_SKINMAP // For viewing skin map on input source. -void vp9_compute_skin_map(struct VP9_COMP *const cpi, FILE *yuv_skinmap_file); +void vp9_output_skin_map(struct VP9_COMP *const cpi, FILE *yuv_skinmap_file); #endif #ifdef __cplusplus -- 2.40.0