From 6e4dff92a4bf471c82fdb8527bff92fb8778dd65 Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Mon, 25 Jan 2016 12:14:18 -0800 Subject: [PATCH] Implement a tile copying method in large-scale tile coding A tile copy mode is introduced, while allows a tile to use another tile's coded data directly at bitstream level. This largely reduces the bit rate in this use case. Our tests showed that 10% - 20% bit rate reduction was achieved. Change-Id: Icf5ae00320e27193b15ce95297720f8b6f5e7fd9 --- vp9/decoder/vp9_decodeframe.c | 68 +++++++++++++++----- vp9/encoder/vp9_bitstream.c | 115 +++++++++++++++++++++++++++------- vp9/encoder/vp9_bitstream.h | 18 ++++-- vp9/encoder/vp9_encoder.h | 8 +++ 4 files changed, 165 insertions(+), 44 deletions(-) diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index df35ae732..45800ad4f 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -2558,7 +2558,8 @@ static void get_tile_buffer(const uint8_t *const data_end, struct vpx_internal_error_info *error_info, const uint8_t **data, vpx_decrypt_cb decrypt_cb, void *decrypt_state, - TileBuffer *buf, VP9_COMMON *const cm) { + TileBuffer (*tile_buffers)[1024], + int tile_size_bytes, int col, int row) { #else static void get_tile_buffer(const uint8_t *const data_end, int is_last, @@ -2571,26 +2572,39 @@ static void get_tile_buffer(const uint8_t *const data_end, size_t size; #if CONFIG_ROW_TILE + size_t copy_size = 0; + const uint8_t *copy_data = NULL; + // mem read function MemRead read_tile_size; - setup_size_read(cm->tile_size_bytes, &read_tile_size); + setup_size_read(tile_size_bytes, &read_tile_size); #endif if (!is_last) { #if CONFIG_ROW_TILE - if (!read_is_valid(*data, cm->tile_size_bytes, data_end)) + if (!read_is_valid(*data, tile_size_bytes, data_end)) vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt tile length"); if (decrypt_cb) { uint8_t be_data[4]; - decrypt_cb(decrypt_state, *data, be_data, 4); + decrypt_cb(decrypt_state, *data, be_data, tile_size_bytes); // Only read number of bytes in cm->tile_size_bytes. size = read_tile_size(be_data); } else { size = read_tile_size(*data); } - *data += cm->tile_size_bytes; + + if ((size >> (tile_size_bytes * 8 - 1)) == 1) { + int offset = (size >> (tile_size_bytes - 1) * 8) & 0x7f; + + // Currently, only use tiles in same column as reference tiles. + copy_data = tile_buffers[row - offset][col].data; + copy_size = tile_buffers[row - offset][col].size; + size = 0; + } + + *data += tile_size_bytes; #else if (!read_is_valid(*data, 4, data_end)) vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, @@ -2613,8 +2627,18 @@ static void get_tile_buffer(const uint8_t *const data_end, size = data_end - *data; } +#if CONFIG_ROW_TILE + if (size > 0) { + tile_buffers[row][col].data = *data; + tile_buffers[row][col].size = size; + } else { + tile_buffers[row][col].data = copy_data; + tile_buffers[row][col].size = copy_size; + } +#else buf->data = *data; buf->size = size; +#endif *data += size; } @@ -2633,15 +2657,17 @@ static void get_tile_buffers(VP9Decoder *pbi, MIN(pbi->dec_tile_col, tile_cols - 1); int tile_row_limit = (pbi->dec_tile_row == -1) ? INT_MAX : MIN(pbi->dec_tile_row, tile_rows - 1); + int tile_col_size_bytes = cm->tile_col_size_bytes; + int tile_size_bytes = cm->tile_size_bytes; // tile col size read function MemRead read_tile_col_size; - setup_size_read(cm->tile_col_size_bytes, &read_tile_col_size); + setup_size_read(tile_col_size_bytes, &read_tile_col_size); for (c = 0; c < tile_cols && c <= tile_col_limit; ++c) { if (c < tile_cols - 1) { tile_col_size = read_tile_col_size(data); - data += cm->tile_col_size_bytes; + data += tile_col_size_bytes; tile_end_col[c] = data + tile_col_size; } else { tile_col_size = data_end - data; @@ -2657,15 +2683,18 @@ static void get_tile_buffers(VP9Decoder *pbi, if (tile_col_limit > 0) data = tile_end_col[tile_col_limit - 1]; if (tile_col_limit < tile_cols - 1) - data += cm->tile_col_size_bytes; + data += tile_col_size_bytes; for (r = 0; r <= tile_row_limit; ++r) { - const int is_last = (r == tile_rows - 1); - TileBuffer *const buf = &tile_buffers[r][tile_col_limit]; - buf->col = tile_col_limit; + // The last tile in the row also has a tile header. So here always set + // is_last = 0. + const int is_last = 0; + + tile_buffers[r][tile_col_limit].col = tile_col_limit; get_tile_buffer(tile_end_col[tile_col_limit], is_last, &pbi->common.error, &data, - pbi->decrypt_cb, pbi->decrypt_state, buf, cm); + pbi->decrypt_cb, pbi->decrypt_state, + tile_buffers, tile_size_bytes, tile_col_limit, r); } return; } @@ -2675,15 +2704,18 @@ static void get_tile_buffers(VP9Decoder *pbi, data = tile_end_col[c - 1]; if (c < tile_cols - 1) - data += cm->tile_col_size_bytes; + data += tile_col_size_bytes; for (r = 0; r < tile_rows && r <= tile_row_limit; ++r) { - const int is_last = (r == tile_rows - 1); - TileBuffer *const buf = &tile_buffers[r][c]; - buf->col = c; + // The last tile in the row also has a tile header. So here always set + // is_last = 0. + const int is_last = 0; + + tile_buffers[r][c].col = c; get_tile_buffer(tile_end_col[c], is_last, &pbi->common.error, &data, - pbi->decrypt_cb, pbi->decrypt_state, buf, cm); + pbi->decrypt_cb, pbi->decrypt_state, + tile_buffers, tile_size_bytes, c, r); } } } @@ -2767,6 +2799,8 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, vpx_memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) * aligned_cols); + // Scan the frame data buffer, and get each tile data location as well as its + // size. get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); if (pbi->tile_data == NULL || diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 1e107accf..7e508203a 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -2170,18 +2170,18 @@ static INLINE void set_tile_size_in_bytes(VP9_COMP *cpi, // bytes needed. Note: This decision is not always guaranteed to be true. // In later bitstream packing, need to check if the actual tile size is // out of the range. - if (cpi->max_tile_size < ONE_BYTE_THRESH) + if (cpi->max_tile_size < ONE_BYTE_THRESH(0)) cm->tile_size_bytes = 1; - else if (cpi->max_tile_size < TWO_BYTE_THRESH) + else if (cpi->max_tile_size < TWO_BYTE_THRESH(0)) cm->tile_size_bytes = 2; - else if (cpi->max_tile_size < THREE_BYTE_THRESH) + else if (cpi->max_tile_size < THREE_BYTE_THRESH(0)) cm->tile_size_bytes = 3; - if (cpi->max_tile_col_size < ONE_BYTE_THRESH) + if (cpi->max_tile_col_size < ONE_BYTE_THRESH(1)) cm->tile_col_size_bytes = 1; - else if (cpi->max_tile_col_size < TWO_BYTE_THRESH) + else if (cpi->max_tile_col_size < TWO_BYTE_THRESH(1)) cm->tile_col_size_bytes = 2; - else if (cpi->max_tile_col_size < THREE_BYTE_THRESH) + else if (cpi->max_tile_col_size < THREE_BYTE_THRESH(1)) cm->tile_col_size_bytes = 3; } } @@ -2265,18 +2265,18 @@ static int get_refresh_mask(VP9_COMP *cpi) { #if CONFIG_ROW_TILE static INLINE void setup_size_storing(int num_bytes, MemPut *output, - unsigned int *size_limit) { + unsigned int *size_limit, int type) { *output = mem_put_be32; *size_limit = UINT_MAX; if (num_bytes == 3) { *output = mem_put_be24; - *size_limit = THREE_BYTE_LIMIT; + *size_limit = THREE_BYTE_LIMIT(type); } else if (num_bytes == 2) { *output = mem_put_be16; - *size_limit = TWO_BYTE_LIMIT; + *size_limit = TWO_BYTE_LIMIT(type); } else if (num_bytes == 1) { *output = mem_put_be8; - *size_limit = ONE_BYTE_LIMIT; + *size_limit = ONE_BYTE_LIMIT(type); } } #endif @@ -2295,6 +2295,7 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) { #if CONFIG_ROW_TILE TOKENEXTRA *(*tok)[1024] = cpi->tile_tok; TileInfo (*tile)[1024] = cpi->tile_info; + EncTileBuffer (*tile_buf)[1024] = cpi->tile_buffers; #else TOKENEXTRA *tok[4][1 << 6]; TileInfo tile[4][1 << 6]; @@ -2312,9 +2313,10 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) { unsigned int tile_size_limit; unsigned int tile_col_size_limit; - setup_size_storing(cm->tile_size_bytes, &output_tile_size, &tile_size_limit); + setup_size_storing(cm->tile_size_bytes, &output_tile_size, &tile_size_limit, + 0); setup_size_storing(cm->tile_col_size_bytes, &output_tile_col_size, - &tile_col_size_limit); + &tile_col_size_limit, 1); #endif vpx_memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) * @@ -2339,14 +2341,19 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) { total_size += cm->tile_col_size_bytes; for (tile_row = 0; tile_row < tile_rows; tile_row++) { - const TileInfo * const ptile = &tile[tile_row][tile_col]; + TileInfo * const ptile = &tile[tile_row][tile_col]; + EncTileBuffer *const ptile_buf = &tile_buf[tile_row][tile_col]; + uint8_t *source; + int write_tile_data = 1; + tok_end = tok[tile_row][tile_col] + cpi->tok_count[tile_row][tile_col]; - if (tile_row < tile_rows - 1) - vp9_start_encode(&residual_bc, data_ptr + total_size - + cm->tile_size_bytes); - else - vp9_start_encode(&residual_bc, data_ptr + total_size); + // Is CONFIG_ROW_TILE = 1, every tile in the row has a header even for + // the last one. + ptile_buf->data_start = data_ptr + total_size; + source = data_ptr + total_size + cm->tile_size_bytes; + + vp9_start_encode(&residual_bc, source); write_modes(cpi, ptile, &residual_bc, &tok[tile_row][tile_col], tok_end); assert(tok[tile_row][tile_col] == tok_end); @@ -2362,12 +2369,78 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) { return 0; } - if (tile_row < tile_rows - 1) { + ptile_buf->data_size = residual_bc.pos; + + // Check if this tile is a copy tile. + // Very low chances to have copy tiles on the key frame. Thus, don't + // search on key frame to reduce unnecessary search. + if (cm->frame_type != KEY_FRAME && final_packing) { + const MV32 candidates[1] = {{1, 0}}; + int i; + + assert(cm->tile_size_bytes >= 1); + + // (TODO: yunqingwang) For now, only above tile is checked and used. + // More candidates such as left tile can be added later. + for (i = 0; i < 1; i++) { + int cand_row = tile_row - candidates[0].row; + int cand_col = tile_col - candidates[0].col; + uint8_t tile_hdr; + uint8_t *ref_tile; + unsigned int ref_tile_size; + int identical_tile_offset = 0; + + if (tile_row == 0 ) + continue; + + tile_hdr = *(tile_buf[cand_row][cand_col].data_start); + + // Read out tcm bit + if ((tile_hdr >> 7) == 1) { + // The candidate is a copy tile itself + tile_hdr &= 0x7f; + identical_tile_offset = tile_hdr + 1; + ref_tile = tile_buf[cand_row - tile_hdr][cand_col].data_start + + cm->tile_size_bytes; + ref_tile_size = tile_buf[cand_row - tile_hdr][cand_col].data_size; + } else { + identical_tile_offset = 1; + ref_tile = tile_buf[cand_row][cand_col].data_start + + cm->tile_size_bytes; + ref_tile_size = tile_buf[cand_row][cand_col].data_size; + } + + if (identical_tile_offset < 128 && ref_tile_size == residual_bc.pos) { + unsigned int m; + uint8_t *cur_tile = tile_buf[tile_row][tile_col].data_start + + cm->tile_size_bytes; + int match = 1; + + for (m = 0; m < residual_bc.pos; m++) { + if (*ref_tile++ != *cur_tile++) { + match = 0; + break; + } + } + + if (match) { + write_tile_data = 0; + identical_tile_offset |= 0x80; + identical_tile_offset <<= (cm->tile_size_bytes - 1) * 8; + output_tile_size(data_ptr + total_size, identical_tile_offset); + break; + } + } + } + } + + if (write_tile_data) { // size of this tile output_tile_size(data_ptr + total_size, residual_bc.pos); - total_size += cm->tile_size_bytes; + total_size += residual_bc.pos; } - total_size += residual_bc.pos; + + total_size += cm->tile_size_bytes; } if (!is_last_col) { diff --git a/vp9/encoder/vp9_bitstream.h b/vp9/encoder/vp9_bitstream.h index 676168a07..7422e0b05 100644 --- a/vp9/encoder/vp9_bitstream.h +++ b/vp9/encoder/vp9_bitstream.h @@ -22,12 +22,18 @@ extern "C" { void vp9_entropy_mode_init(); #if CONFIG_ROW_TILE -#define ONE_BYTE_LIMIT 255 -#define TWO_BYTE_LIMIT 65535 -#define THREE_BYTE_LIMIT 16777215 -#define ONE_BYTE_THRESH (ONE_BYTE_LIMIT - (ONE_BYTE_LIMIT >> 2)) -#define TWO_BYTE_THRESH (TWO_BYTE_LIMIT - (TWO_BYTE_LIMIT >> 2)) -#define THREE_BYTE_THRESH (THREE_BYTE_LIMIT - (THREE_BYTE_LIMIT >> 2)) +// In the tile header, 1 bit is used for TCM, and the rest bits are used for +// TDS. But, no TCM bit in tile column header. +// type 0: tile header; type 1: tile column header. +#define ONE_BYTE_LIMIT(type) ((type) ? 255 : 127) +#define TWO_BYTE_LIMIT(type) ((type) ? 65535 : 32767) +#define THREE_BYTE_LIMIT(type) ((type) ? 16777215 : 8388607) +#define ONE_BYTE_THRESH(type) \ + (ONE_BYTE_LIMIT(type) - (ONE_BYTE_LIMIT(type) >> 2)) +#define TWO_BYTE_THRESH(type) \ + (TWO_BYTE_LIMIT(type) - (TWO_BYTE_LIMIT(type) >> 2)) +#define THREE_BYTE_THRESH(type) \ + (THREE_BYTE_LIMIT(type) - (THREE_BYTE_LIMIT(type) >> 2)) typedef void (*MemPut)(void *, MEM_VALUE_T); int vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size, diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 7142f43e7..4d1d837fc 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -217,6 +217,13 @@ static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) { return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0; } +#if CONFIG_ROW_TILE +typedef struct EncTileBuffer { + uint8_t *data_start; + unsigned int data_size; +} EncTileBuffer; +#endif + typedef struct VP9_COMP { QUANTS quants; MACROBLOCK mb; @@ -441,6 +448,7 @@ typedef struct VP9_COMP { #if CONFIG_ROW_TILE TileInfo tile_info[1024][1024]; + EncTileBuffer tile_buffers[1024][1024]; TOKENEXTRA *tile_tok[1024][1024]; unsigned int max_tile_size; -- 2.40.0