}
if (cm->above_context_alloc_cols < cm->mi_cols) {
+ // TODO(geza.lore): These are bigger than they need to be.
+ // cm->tile_width would be enough but it complicates indexing a
+ // little elsewhere.
+ const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
int i;
+
for (i = 0 ; i < MAX_MB_PLANE ; i++) {
- vpx_free(cm->above_context[i]);
+ vpx_free(cm->above_context[i]);
cm->above_context[i] = (ENTROPY_CONTEXT *)vpx_calloc(
- 2 * mi_cols_aligned_to_sb(cm->mi_cols),
- sizeof(*cm->above_context[0]));
+ 2 * aligned_mi_cols, sizeof(*cm->above_context[0]));
if (!cm->above_context[i]) goto fail;
}
vpx_free(cm->above_seg_context);
cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc(
- mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_seg_context));
+ aligned_mi_cols, sizeof(*cm->above_seg_context));
if (!cm->above_seg_context) goto fail;
#if CONFIG_VAR_TX
vpx_free(cm->above_txfm_context);
cm->above_txfm_context = (TXFM_CONTEXT *)vpx_calloc(
- mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_txfm_context));
+ aligned_mi_cols, sizeof(*cm->above_txfm_context));
if (!cm->above_txfm_context) goto fail;
#endif
- cm->above_context_alloc_cols = cm->mi_cols;
+ cm->above_context_alloc_cols = aligned_mi_cols;
}
return 0;
#define MI_MASK (MI_BLOCK_SIZE - 1)
+#if CONFIG_EXT_TILE
+# define MAX_TILE_ROWS 1024
+# define MAX_TILE_COLS 1024
+#else
+# define MAX_TILE_ROWS 4
+# define MAX_TILE_COLS 64
+#endif // CONFIG_EXT_TILE
+
// Bitstream profiles indicated by 2-3 bits in the uncompressed header.
// 00: Profile 0. 8-bit 4:2:0 only.
// 10: Profile 1. 8-bit 4:4:4, 4:2:2, and 4:4:0.
static INLINE int is_inside(const TileInfo *const tile,
int mi_col, int mi_row, int mi_rows,
const POSITION *mi_pos) {
+#if CONFIG_EXT_TILE
+ (void) mi_rows;
+ return !(mi_row + mi_pos->row < tile->mi_row_start ||
+ mi_col + mi_pos->col < tile->mi_col_start ||
+ mi_row + mi_pos->row >= tile->mi_row_end ||
+ mi_col + mi_pos->col >= tile->mi_col_end);
+#else
return !(mi_row + mi_pos->row < 0 ||
mi_col + mi_pos->col < tile->mi_col_start ||
mi_row + mi_pos->row >= mi_rows ||
mi_col + mi_pos->col >= tile->mi_col_end);
+#endif // CONFIG_EXT_TILE
}
static INLINE void lower_mv_precision(MV *mv, int allow_hp) {
int error_resilient_mode;
+#if !CONFIG_EXT_TILE
int log2_tile_cols, log2_tile_rows;
- int tile_sz_mag;
+#endif // !CONFIG_EXT_TILE
+ int tile_cols, tile_rows;
+ int tile_width, tile_height;
+
int byte_alignment;
int skip_loop_filter;
static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) {
const int above_idx = mi_col * 2;
- const int left_idx = (mi_row * 2) & 15;
+ const int left_idx = (mi_row * 2) & 15; // FIXME: Mask should be CU_SIZE*2-1
int i;
for (i = 0; i < MAX_MB_PLANE; ++i) {
struct macroblockd_plane *const pd = &xd->plane[i];
xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8;
// Are edges available for intra prediction?
+#if CONFIG_EXT_TILE
+ xd->up_available = (mi_row > tile->mi_row_start);
+#else
xd->up_available = (mi_row != 0);
+#endif // CONFIG_EXT_TILE
xd->left_available = (mi_col > tile->mi_col_start);
if (xd->up_available) {
xd->above_mi = xd->mi[-xd->mi_stride];
static INLINE void vp10_zero_above_context(VP10_COMMON *const cm,
int mi_col_start, int mi_col_end) {
const int width = mi_col_end - mi_col_start;
- int i;
- for (i = 0 ; i < MAX_MB_PLANE ; i++)
- vp10_zero_array(cm->above_context[i] + 2 * mi_col_start, 2 * width);
+ const int offset_y = 2 * mi_col_start;
+ const int width_y = 2 * width;
+ const int offset_uv = offset_y >> cm->subsampling_x;
+ const int width_uv = width_y >> cm->subsampling_x;
+
+ vp10_zero_array(cm->above_context[0] + offset_y, width_y);
+ vp10_zero_array(cm->above_context[1] + offset_uv, width_uv);
+ vp10_zero_array(cm->above_context[2] + offset_uv, width_uv);
+
vp10_zero_array(cm->above_seg_context + mi_col_start, width);
+
#if CONFIG_VAR_TX
vp10_zero_array(cm->above_txfm_context + mi_col_start, width);
#endif // CONFIG_VAR_TX
const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
// Decoder may allocate more threads than number of tiles based on user's
// input.
- const int tile_cols = 1 << cm->log2_tile_cols;
+ const int tile_cols = cm->tile_cols;
const int num_workers = VPXMIN(nworkers, tile_cols);
int i;
#define MIN_TILE_WIDTH_B64 4
#define MAX_TILE_WIDTH_B64 64
-static int get_tile_offset(int idx, int mis, int log2) {
- const int sb_cols = mi_cols_aligned_to_sb(mis) >> MI_BLOCK_SIZE_LOG2;
- const int offset = ((idx * sb_cols) >> log2) << MI_BLOCK_SIZE_LOG2;
- return VPXMIN(offset, mis);
-}
-
void vp10_tile_set_row(TileInfo *tile, const VP10_COMMON *cm, int row) {
- tile->mi_row_start = get_tile_offset(row, cm->mi_rows, cm->log2_tile_rows);
- tile->mi_row_end = get_tile_offset(row + 1, cm->mi_rows, cm->log2_tile_rows);
+ tile->mi_row_start = row * cm->tile_height;
+ tile->mi_row_end = VPXMIN(tile->mi_row_start + cm->tile_height,
+ cm->mi_rows);
}
void vp10_tile_set_col(TileInfo *tile, const VP10_COMMON *cm, int col) {
- tile->mi_col_start = get_tile_offset(col, cm->mi_cols, cm->log2_tile_cols);
- tile->mi_col_end = get_tile_offset(col + 1, cm->mi_cols, cm->log2_tile_cols);
+ tile->mi_col_start = col * cm->tile_width;
+ tile->mi_col_end = VPXMIN(tile->mi_col_start + cm->tile_width,
+ cm->mi_cols);
}
void vp10_tile_init(TileInfo *tile, const VP10_COMMON *cm, int row, int col) {
vp10_tile_set_col(tile, cm, col);
}
+#if !CONFIG_EXT_TILE
+// TODO(geza.lore): CU_SIZE dependent.
static int get_min_log2_tile_cols(const int sb64_cols) {
int min_log2 = 0;
while ((MAX_TILE_WIDTH_B64 << min_log2) < sb64_cols)
}
void vp10_get_tile_n_bits(int mi_cols,
- int *min_log2_tile_cols, int *max_log2_tile_cols) {
+ int *min_log2_tile_cols, int *max_log2_tile_cols) {
const int sb64_cols = mi_cols_aligned_to_sb(mi_cols) >> MI_BLOCK_SIZE_LOG2;
*min_log2_tile_cols = get_min_log2_tile_cols(sb64_cols);
*max_log2_tile_cols = get_max_log2_tile_cols(sb64_cols);
assert(*min_log2_tile_cols <= *max_log2_tile_cols);
}
+#endif // !CONFIG_EXT_TILE
pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height;
}
-static void setup_tile_info(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) {
+static void setup_tile_info(VP10Decoder *const pbi,
+ struct vpx_read_bit_buffer *const rb) {
+ VP10_COMMON *const cm = &pbi->common;
+#if CONFIG_EXT_TILE
+ // Read the tile width/height
+ cm->tile_width = vpx_rb_read_literal(rb, 6) + 1; // in [1, 64]
+ cm->tile_height = vpx_rb_read_literal(rb, 6) + 1; // in [1, 64]
+
+ cm->tile_width = cm->tile_width << MI_BLOCK_SIZE_LOG2;
+ cm->tile_height = cm->tile_height << MI_BLOCK_SIZE_LOG2;
+
+ cm->tile_width = VPXMIN(cm->tile_width, cm->mi_cols);
+ cm->tile_height = VPXMIN(cm->tile_height, cm->mi_rows);
+
+ // Get the number of tiles
+ cm->tile_cols = 1;
+ while (cm->tile_cols * cm->tile_width < cm->mi_cols)
+ ++cm->tile_cols;
+
+ cm->tile_rows = 1;
+ while (cm->tile_rows * cm->tile_height < cm->mi_rows)
+ ++cm->tile_rows;
+
+ if (cm->tile_cols * cm->tile_rows > 1) {
+ // Read the number of bytes used to store tile size
+ pbi->tile_col_size_bytes = vpx_rb_read_literal(rb, 2) + 1;
+ pbi->tile_size_bytes = vpx_rb_read_literal(rb, 2) + 1;
+ }
+#else
int min_log2_tile_cols, max_log2_tile_cols, max_ones;
vp10_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
if (cm->log2_tile_rows)
cm->log2_tile_rows += vpx_rb_read_bit(rb);
+ cm->tile_cols = 1 << cm->log2_tile_cols;
+ cm->tile_rows = 1 << cm->log2_tile_rows;
+
+ cm->tile_width = (mi_cols_aligned_to_sb(cm->mi_cols) >> cm->log2_tile_cols);
+ cm->tile_height = (mi_cols_aligned_to_sb(cm->mi_rows) >> cm->log2_tile_rows);
+
+ // round to integer multiples of 8
+ cm->tile_width = mi_cols_aligned_to_sb(cm->tile_width);
+ cm->tile_height = mi_cols_aligned_to_sb(cm->tile_height);
+
// tile size magnitude
- if (cm->log2_tile_rows > 0 || cm->log2_tile_cols > 0) {
- cm->tile_sz_mag = vpx_rb_read_literal(rb, 2);
+ if (cm->tile_rows > 1 || cm->tile_cols > 1) {
+ pbi->tile_size_bytes = vpx_rb_read_literal(rb, 2) + 1;
}
+#endif // CONFIG_EXT_TILE
}
-typedef struct TileBuffer {
- const uint8_t *data;
- size_t size;
- int col; // only used with multi-threaded decoding
-} TileBuffer;
-
-static int mem_get_varsize(const uint8_t *data, const int mag) {
- switch (mag) {
- case 0:
- return data[0];
+static int mem_get_varsize(const uint8_t *src, const int sz) {
+ switch (sz) {
case 1:
- return mem_get_le16(data);
+ return src[0];
case 2:
- return mem_get_le24(data);
+ return mem_get_le16(src);
case 3:
- return mem_get_le32(data);
+ return mem_get_le24(src);
+ case 4:
+ return mem_get_le32(src);
+ default:
+ assert("Invalid size" && 0);
+ return -1;
}
+}
+
+#if CONFIG_EXT_TILE
+// Reads the next tile returning its size and adjusting '*data' accordingly
+// based on 'is_last'.
+static void get_tile_buffer(const uint8_t *const data_end,
+ struct vpx_internal_error_info *error_info,
+ const uint8_t **data,
+ vpx_decrypt_cb decrypt_cb, void *decrypt_state,
+ TileBufferDec (*const tile_buffers)[MAX_TILE_COLS],
+ int tile_size_bytes, int col, int row) {
+ size_t size;
- assert("Invalid tile size marker value" && 0);
+ size_t copy_size = 0;
+ const uint8_t *copy_data = NULL;
- return -1;
+ if (!read_is_valid(*data, tile_size_bytes, data_end))
+ vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet or corrupt tile length");
+ if (decrypt_cb) {
+ uint8_t be_data[4];
+ decrypt_cb(decrypt_state, *data, be_data, tile_size_bytes);
+
+ // Only read number of bytes in cm->tile_size_bytes.
+ size = mem_get_varsize(be_data, tile_size_bytes);
+ } else {
+ size = mem_get_varsize(*data, tile_size_bytes);
+ }
+
+ // The top bit indicates copy mode
+ if ((size >> (tile_size_bytes * 8 - 1)) == 1) {
+ // The remaining bits in the top byte signal the row offset
+ int offset = (size >> (tile_size_bytes - 1) * 8) & 0x7f;
+
+ // Currently, only use tiles in same column as reference tiles.
+ copy_data = tile_buffers[row - offset][col].data;
+ copy_size = tile_buffers[row - offset][col].size;
+ size = 0;
+ }
+
+ *data += tile_size_bytes;
+
+ if (size > (size_t)(data_end - *data))
+ vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet or corrupt tile size");
+
+ if (size > 0) {
+ tile_buffers[row][col].data = *data;
+ tile_buffers[row][col].size = size;
+ } else {
+ tile_buffers[row][col].data = copy_data;
+ tile_buffers[row][col].size = copy_size;
+ }
+
+ *data += size;
}
+static void get_tile_buffers(
+ VP10Decoder *pbi,
+ const uint8_t *data, const uint8_t *data_end,
+ TileBufferDec (*const tile_buffers)[MAX_TILE_COLS]) {
+ VP10_COMMON *const cm = &pbi->common;
+ const int tile_cols = cm->tile_cols;
+ const int tile_rows = cm->tile_rows;
+ const int have_tiles = tile_cols * tile_rows > 1;
+
+ if (!have_tiles) {
+ const uint32_t tile_size = data_end - data;
+ tile_buffers[0][0].data = data;
+ tile_buffers[0][0].size = tile_size;
+ } else {
+ const uint8_t *tile_col_data_end[MAX_TILE_COLS];
+ const uint8_t *const data_start = data;
+
+ const int dec_tile_row = VPXMIN(pbi->dec_tile_row, tile_rows);
+ const int single_row = pbi->dec_tile_row >= 0;
+ const int tile_rows_start = single_row ? dec_tile_row : 0;
+ const int tile_rows_end = single_row ? tile_rows_start + 1 : tile_rows;
+ const int dec_tile_col = VPXMIN(pbi->dec_tile_col, tile_cols);
+ const int single_col = pbi->dec_tile_col >= 0;
+ const int tile_cols_start = single_col ? dec_tile_col : 0;
+ const int tile_cols_end = single_col ? tile_cols_start + 1 : tile_cols;
+
+ const int tile_col_size_bytes = pbi->tile_col_size_bytes;
+ const int tile_size_bytes = pbi->tile_size_bytes;
+
+ size_t tile_col_size;
+ int r, c;
+
+ // Read tile column sizes
+ for (c = 0; c < tile_cols_end; ++c) {
+ const int is_last = c == tile_cols - 1;
+ if (!is_last) {
+ tile_col_size = mem_get_varsize(data, tile_col_size_bytes);
+ data += tile_col_size_bytes;
+ tile_col_data_end[c] = data + tile_col_size;
+ } else {
+ tile_col_size = data_end - data;
+ tile_col_data_end[c] = data_end;
+ }
+ data += tile_col_size;
+ }
+
+ data = data_start;
+
+ // Read tile sizes
+ for (c = tile_cols_start; c < tile_cols_end; ++c) {
+ if (c > 0)
+ data = tile_col_data_end[c - 1];
+
+ if (c < tile_cols - 1)
+ data += tile_col_size_bytes;
+
+ for (r = 0; r < tile_rows_end; ++r) {
+ tile_buffers[r][c].col = c;
+
+ get_tile_buffer(tile_col_data_end[c],
+ &pbi->common.error, &data,
+ pbi->decrypt_cb, pbi->decrypt_state,
+ tile_buffers, tile_size_bytes, c, r);
+ }
+ }
+ }
+}
+#else
// Reads the next tile returning its size and adjusting '*data' accordingly
// based on 'is_last'.
static void get_tile_buffer(const uint8_t *const data_end,
- const int tile_sz_mag, int is_last,
+ const int tile_size_bytes, int is_last,
struct vpx_internal_error_info *error_info,
const uint8_t **data,
vpx_decrypt_cb decrypt_cb, void *decrypt_state,
- TileBuffer *buf) {
+ TileBufferDec *const buf) {
size_t size;
if (!is_last) {
if (decrypt_cb) {
uint8_t be_data[4];
- decrypt_cb(decrypt_state, *data, be_data, tile_sz_mag + 1);
- size = mem_get_varsize(be_data, tile_sz_mag) + 1;
+ decrypt_cb(decrypt_state, *data, be_data, tile_size_bytes);
+ size = mem_get_varsize(be_data, tile_size_bytes);
} else {
- size = mem_get_varsize(*data, tile_sz_mag) + 1;
+ size = mem_get_varsize(*data, tile_size_bytes);
}
- *data += tile_sz_mag + 1;
+ *data += tile_size_bytes;
if (size > (size_t)(data_end - *data))
vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME,
*data += size;
}
-static void get_tile_buffers(VP10Decoder *pbi,
- const uint8_t *data, const uint8_t *data_end,
- int tile_cols, int tile_rows,
- TileBuffer (*tile_buffers)[1 << 6]) {
+static void get_tile_buffers(
+ VP10Decoder *pbi,
+ const uint8_t *data, const uint8_t *data_end,
+ TileBufferDec (*const tile_buffers)[MAX_TILE_COLS]) {
+ VP10_COMMON *const cm = &pbi->common;
int r, c;
+ const int tile_cols = cm->tile_cols;
+ const int tile_rows = cm->tile_rows;
for (r = 0; r < tile_rows; ++r) {
for (c = 0; c < tile_cols; ++c) {
const int is_last = (r == tile_rows - 1) && (c == tile_cols - 1);
- TileBuffer *const buf = &tile_buffers[r][c];
+ TileBufferDec *const buf = &tile_buffers[r][c];
buf->col = c;
- get_tile_buffer(data_end, pbi->common.tile_sz_mag,
- is_last, &pbi->common.error, &data,
+ get_tile_buffer(data_end, pbi->tile_size_bytes,
+ is_last, &cm->error, &data,
pbi->decrypt_cb, pbi->decrypt_state, buf);
}
}
}
+#endif // CONFIG_EXT_TILE
static const uint8_t *decode_tiles(VP10Decoder *pbi,
const uint8_t *data,
const uint8_t *data_end) {
VP10_COMMON *const cm = &pbi->common;
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
- const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols);
- const int tile_cols = 1 << cm->log2_tile_cols;
- const int tile_rows = 1 << cm->log2_tile_rows;
- TileBuffer tile_buffers[4][1 << 6];
+ const int tile_cols = cm->tile_cols;
+ const int tile_rows = cm->tile_rows;
+ TileBufferDec (*const tile_buffers)[MAX_TILE_COLS] = pbi->tile_buffers;
+#if CONFIG_EXT_TILE
+ const int dec_tile_row = VPXMIN(pbi->dec_tile_row, tile_rows);
+ const int single_row = pbi->dec_tile_row >= 0;
+ const int tile_rows_start = single_row ? dec_tile_row : 0;
+ const int tile_rows_end = single_row ? dec_tile_row + 1 : tile_rows;
+ const int dec_tile_col = VPXMIN(pbi->dec_tile_col, tile_cols);
+ const int single_col = pbi->dec_tile_col >= 0;
+ const int tile_cols_start = single_col ? dec_tile_col : 0;
+ const int tile_cols_end = single_col ? tile_cols_start + 1 : tile_cols;
+ const int inv_col_order = pbi->inv_tile_order && !single_col;
+#else
+ const int tile_rows_start = 0;
+ const int tile_rows_end = tile_rows;
+ const int tile_cols_start = 0;
+ const int tile_cols_end = tile_cols;
+ const int inv_col_order = pbi->inv_tile_order;
+#endif // CONFIG_EXT_TILE
int tile_row, tile_col;
- int mi_row, mi_col;
- TileData *tile_data = NULL;
#if CONFIG_ENTROPY
cm->do_subframe_update =
pbi->mb.plane);
}
- assert(tile_rows <= 4);
- assert(tile_cols <= (1 << 6));
-
- vp10_zero_above_context(cm, 0, aligned_cols);
+ assert(tile_rows <= MAX_TILE_ROWS);
+ assert(tile_cols <= MAX_TILE_COLS);
- get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
+ get_tile_buffers(pbi, data, data_end, tile_buffers);
if (pbi->tile_data == NULL ||
- (tile_cols * tile_rows) != pbi->total_tiles) {
+ (tile_cols * tile_rows) != pbi->allocated_tiles) {
vpx_free(pbi->tile_data);
CHECK_MEM_ERROR(
cm,
pbi->tile_data,
vpx_memalign(32, tile_cols * tile_rows * (sizeof(*pbi->tile_data))));
- pbi->total_tiles = tile_rows * tile_cols;
+ pbi->allocated_tiles = tile_rows * tile_cols;
}
// Load all tile information into tile_data.
- for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
- for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
- const TileBuffer *const buf = &tile_buffers[tile_row][tile_col];
-
- tile_data = pbi->tile_data + tile_cols * tile_row + tile_col;
- tile_data->cm = cm;
- tile_data->xd = pbi->mb;
- tile_data->xd.corrupted = 0;
- tile_data->xd.counts =
+ for (tile_row = tile_rows_start; tile_row < tile_rows_end; ++tile_row) {
+ for (tile_col = tile_cols_start; tile_col < tile_cols_end; ++tile_col) {
+ const TileBufferDec *const buf = &tile_buffers[tile_row][tile_col];
+ TileData *const td = pbi->tile_data + tile_cols * tile_row + tile_col;
+
+ td->cm = cm;
+ td->xd = pbi->mb;
+ td->xd.corrupted = 0;
+ td->xd.counts =
cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD ?
&cm->counts : NULL;
- vp10_zero(tile_data->dqcoeff);
- vp10_tile_init(&tile_data->xd.tile, tile_data->cm, tile_row, tile_col);
-#if !CONFIG_ANS
- setup_bool_decoder(buf->data, data_end, buf->size, &cm->error,
- &tile_data->bit_reader, pbi->decrypt_cb,
- pbi->decrypt_state);
-#else
- if (buf->size < 3 || !read_is_valid(buf->data, buf->size, data_end))
- vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
- "Truncated packet or corrupt tile length");
+ vp10_zero(td->dqcoeff);
+ vp10_tile_init(&td->xd.tile, td->cm, tile_row, tile_col);
setup_bool_decoder(buf->data, data_end, buf->size, &cm->error,
- &tile_data->bit_reader, pbi->decrypt_cb,
- pbi->decrypt_state);
+ &td->bit_reader,
+ pbi->decrypt_cb, pbi->decrypt_state);
+#if CONFIG_ANS
setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
- &tile_data->token_ans, pbi->decrypt_cb,
- pbi->decrypt_state);
-#endif
- vp10_init_macroblockd(cm, &tile_data->xd, tile_data->dqcoeff);
- tile_data->xd.plane[0].color_index_map = tile_data->color_index_map[0];
- tile_data->xd.plane[1].color_index_map = tile_data->color_index_map[1];
+ &td->token_ans,
+ pbi->decrypt_cb, pbi->decrypt_state);
+#endif // CONFIG_ANS
+ vp10_init_macroblockd(cm, &td->xd, td->dqcoeff);
+ td->xd.plane[0].color_index_map = td->color_index_map[0];
+ td->xd.plane[1].color_index_map = td->color_index_map[1];
}
}
- for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
- TileInfo tile;
- vp10_tile_set_row(&tile, cm, tile_row);
- for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end;
- mi_row += MI_BLOCK_SIZE) {
- for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
- const int col = pbi->inv_tile_order ?
- tile_cols - tile_col - 1 : tile_col;
- tile_data = pbi->tile_data + tile_cols * tile_row + col;
- vp10_tile_set_col(&tile, tile_data->cm, col);
- vp10_zero_left_context(&tile_data->xd);
- for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
+ for (tile_row = tile_rows_start; tile_row < tile_rows_end; ++tile_row) {
+ int mi_row = 0;
+ TileInfo tile_info;
+
+ vp10_tile_set_row(&tile_info, cm, tile_row);
+
+ for (tile_col = tile_cols_start; tile_col < tile_cols_end; ++tile_col) {
+ const int col = inv_col_order ? tile_cols - 1 - tile_col : tile_col;
+ TileData *const td = pbi->tile_data + tile_cols * tile_row + col;
+
+ vp10_tile_set_col(&tile_info, cm, col);
+
+ vp10_zero_above_context(cm, tile_info.mi_col_start, tile_info.mi_col_end);
+
+ for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
+ mi_row += MI_BLOCK_SIZE) {
+ int mi_col;
+
+ vp10_zero_left_context(&td->xd);
+
+ for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
mi_col += MI_BLOCK_SIZE) {
- decode_partition(pbi, &tile_data->xd,
+ decode_partition(pbi, &td->xd,
#if CONFIG_SUPERTX
0,
-#endif
- mi_row, mi_col, &tile_data->bit_reader,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col, &td->bit_reader,
#if CONFIG_ANS
- &tile_data->token_ans,
+ &td->token_ans,
#endif // CONFIG_ANS
BLOCK_64X64, 4);
}
- pbi->mb.corrupted |= tile_data->xd.corrupted;
+ pbi->mb.corrupted |= td->xd.corrupted;
if (pbi->mb.corrupted)
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Failed to decode tile data");
}
#endif // CONFIG_ENTROPY
}
+ }
+
+ assert(mi_row > 0);
+
#if !CONFIG_VAR_TX
- // Loopfilter one row.
- if (cm->lf.filter_level && !cm->skip_loop_filter) {
- const int lf_start = mi_row - MI_BLOCK_SIZE;
- LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
-
- // delay the loopfilter by 1 macroblock row.
- if (lf_start < 0) continue;
-
- // decoding has completed: finish up the loop filter in this thread.
- if (mi_row + MI_BLOCK_SIZE >= cm->mi_rows) continue;
-
- winterface->sync(&pbi->lf_worker);
- lf_data->start = lf_start;
- lf_data->stop = mi_row;
- if (pbi->max_threads > 1) {
- winterface->launch(&pbi->lf_worker);
- } else {
- winterface->execute(&pbi->lf_worker);
- }
+ // Loopfilter one tile row.
+ if (cm->lf.filter_level && !cm->skip_loop_filter) {
+ LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
+ const int lf_start = VPXMAX(0, tile_info.mi_row_start - MI_BLOCK_SIZE);
+ const int lf_end = tile_info.mi_row_end - MI_BLOCK_SIZE;
+
+ // Delay the loopfilter if the first tile row is only
+ // a single superblock high.
+ if (lf_end <= 0)
+ continue;
+
+ // Decoding has completed. Finish up the loop filter in this thread.
+ if (tile_info.mi_row_end >= cm->mi_rows)
+ continue;
+
+ winterface->sync(&pbi->lf_worker);
+ lf_data->start = lf_start;
+ lf_data->stop = lf_end;
+ if (pbi->max_threads > 1) {
+ winterface->launch(&pbi->lf_worker);
+ } else {
+ winterface->execute(&pbi->lf_worker);
}
- // After loopfiltering, the last 7 row pixels in each superblock row may
- // still be changed by the longest loopfilter of the next superblock
- // row.
- if (cm->frame_parallel_decode)
- vp10_frameworker_broadcast(pbi->cur_buf,
- mi_row << MI_BLOCK_SIZE_LOG2);
-#endif
}
+
+ // After loopfiltering, the last 7 row pixels in each superblock row may
+ // still be changed by the longest loopfilter of the next superblock row.
+ if (cm->frame_parallel_decode)
+ vp10_frameworker_broadcast(pbi->cur_buf, mi_row << MI_BLOCK_SIZE_LOG2);
+#endif // !CONFIG_VAR_TX
}
- // Loopfilter remaining rows in the frame.
#if CONFIG_VAR_TX
+ // Loopfilter the whole frame.
vp10_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb,
cm->lf.filter_level, 0, 0);
#else
+ // Loopfilter remaining rows in the frame.
if (cm->lf.filter_level && !cm->skip_loop_filter) {
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
winterface->sync(&pbi->lf_worker);
lf_data->stop = cm->mi_rows;
winterface->execute(&pbi->lf_worker);
}
-#endif
-
- // Get last tile data.
- tile_data = pbi->tile_data + tile_cols * tile_rows - 1;
+#endif // CONFIG_VAR_TX
if (cm->frame_parallel_decode)
vp10_frameworker_broadcast(pbi->cur_buf, INT_MAX);
-#if CONFIG_ANS
+
+#if CONFIG_ANS || CONFIG_EXT_TILE
return data_end;
#else
- return vpx_reader_find_end(&tile_data->bit_reader);
-#endif
+ {
+ // Get last tile data.
+ TileData *const td = pbi->tile_data + tile_cols * tile_rows - 1;
+ return vpx_reader_find_end(&td->bit_reader);
+ }
+#endif // CONFIG_ANS || CONFIG_EXT_TILE
}
static int tile_worker_hook(TileWorkerData *const tile_data,
const TileInfo *const tile) {
+ VP10Decoder *const pbi = tile_data->pbi;
int mi_row, mi_col;
if (setjmp(tile_data->error_info.jmp)) {
tile_data->error_info.setjmp = 1;
tile_data->xd.error_info = &tile_data->error_info;
+ vp10_zero_above_context(&pbi->common, tile->mi_col_start, tile->mi_col_end);
+
for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
mi_row += MI_BLOCK_SIZE) {
vp10_zero_left_context(&tile_data->xd);
+
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE) {
- decode_partition(tile_data->pbi, &tile_data->xd,
+ decode_partition(pbi, &tile_data->xd,
#if CONFIG_SUPERTX
0,
#endif
// sorts in descending order
static int compare_tile_buffers(const void *a, const void *b) {
- const TileBuffer *const buf1 = (const TileBuffer*)a;
- const TileBuffer *const buf2 = (const TileBuffer*)b;
+ const TileBufferDec *const buf1 = (const TileBufferDec*)a;
+ const TileBufferDec *const buf2 = (const TileBufferDec*)b;
return (int)(buf2->size - buf1->size);
}
const uint8_t *data_end) {
VP10_COMMON *const cm = &pbi->common;
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
- const uint8_t *bit_reader_end = NULL;
- const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
- const int tile_cols = 1 << cm->log2_tile_cols;
- const int tile_rows = 1 << cm->log2_tile_rows;
+ const int tile_cols = cm->tile_cols;
+ const int tile_rows = cm->tile_rows;
const int num_workers = VPXMIN(pbi->max_threads & ~1, tile_cols);
- TileBuffer tile_buffers[1][1 << 6];
- int n;
+ TileBufferDec (*const tile_buffers)[MAX_TILE_COLS] = pbi->tile_buffers;
+#if CONFIG_EXT_TILE
+ const int dec_tile_row = VPXMIN(pbi->dec_tile_row, tile_rows);
+ const int single_row = pbi->dec_tile_row >= 0;
+ const int tile_rows_start = single_row ? dec_tile_row : 0;
+ const int tile_rows_end = single_row ? dec_tile_row + 1 : tile_rows;
+ const int dec_tile_col = VPXMIN(pbi->dec_tile_col, tile_cols);
+ const int single_col = pbi->dec_tile_col >= 0;
+ const int tile_cols_start = single_col ? dec_tile_col : 0;
+ const int tile_cols_end = single_col ? tile_cols_start + 1 : tile_cols;
+#else
+ const int tile_rows_start = 0;
+ const int tile_rows_end = tile_rows;
+ const int tile_cols_start = 0;
+ const int tile_cols_end = tile_cols;
+#endif // CONFIG_EXT_TILE
+ int tile_row, tile_col;
+ int i;
+
+#if !(CONFIG_ANS || CONFIG_EXT_TILE)
int final_worker = -1;
+#endif // !(CONFIG_ANS || CONFIG_EXT_TILE)
+
+ assert(tile_rows <= MAX_TILE_ROWS);
+ assert(tile_cols <= MAX_TILE_COLS);
- assert(tile_cols <= (1 << 6));
- assert(tile_rows == 1);
- (void)tile_rows;
#if CONFIG_ANS
+ // TODO(any): This might just work now. Needs to be tested.
abort(); // FIXME: Tile parsing broken
-#endif
+#endif // CONFIG_ANS
// TODO(jzern): See if we can remove the restriction of passing in max
// threads to the decoder.
if (pbi->num_tile_workers == 0) {
const int num_threads = pbi->max_threads & ~1;
- int i;
CHECK_MEM_ERROR(cm, pbi->tile_workers,
vpx_malloc(num_threads * sizeof(*pbi->tile_workers)));
// Ensure tile data offsets will be properly aligned. This may fail on
}
// Reset tile decoding hook
- for (n = 0; n < num_workers; ++n) {
- VPxWorker *const worker = &pbi->tile_workers[n];
+ for (i = 0; i < num_workers; ++i) {
+ VPxWorker *const worker = &pbi->tile_workers[i];
winterface->sync(worker);
worker->hook = (VPxWorkerHook)tile_worker_hook;
- worker->data1 = &pbi->tile_worker_data[n];
- worker->data2 = &pbi->tile_worker_info[n];
- }
-
- vp10_zero_above_context(cm, 0, aligned_mi_cols);
-
- // Load tile data into tile_buffers
- get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
-
- // Sort the buffers based on size in descending order.
- qsort(tile_buffers[0], tile_cols, sizeof(tile_buffers[0][0]),
- compare_tile_buffers);
-
- // Rearrange the tile buffers such that per-tile group the largest, and
- // presumably the most difficult, tile will be decoded in the main thread.
- // This should help minimize the number of instances where the main thread is
- // waiting for a worker to complete.
- {
- int group_start = 0;
- while (group_start < tile_cols) {
- const TileBuffer largest = tile_buffers[0][group_start];
- const int group_end = VPXMIN(group_start + num_workers, tile_cols) - 1;
- memmove(tile_buffers[0] + group_start, tile_buffers[0] + group_start + 1,
- (group_end - group_start) * sizeof(tile_buffers[0][0]));
- tile_buffers[0][group_end] = largest;
- group_start = group_end + 1;
- }
+ worker->data1 = &pbi->tile_worker_data[i];
+ worker->data2 = &pbi->tile_worker_info[i];
}
// Initialize thread frame counts.
if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
- int i;
-
for (i = 0; i < num_workers; ++i) {
- TileWorkerData *const tile_data =
- (TileWorkerData*)pbi->tile_workers[i].data1;
- vp10_zero(tile_data->counts);
+ TileWorkerData *const twd = (TileWorkerData*)pbi->tile_workers[i].data1;
+ vp10_zero(twd->counts);
}
}
- n = 0;
- while (n < tile_cols) {
- int i;
- for (i = 0; i < num_workers && n < tile_cols; ++i) {
- VPxWorker *const worker = &pbi->tile_workers[i];
- TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
- TileInfo *const tile = (TileInfo*)worker->data2;
- TileBuffer *const buf = &tile_buffers[0][n];
-
- tile_data->pbi = pbi;
- tile_data->xd = pbi->mb;
- tile_data->xd.corrupted = 0;
- tile_data->xd.counts =
- cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD ?
- &tile_data->counts : NULL;
- vp10_zero(tile_data->dqcoeff);
- vp10_tile_init(tile, cm, 0, buf->col);
- vp10_tile_init(&tile_data->xd.tile, cm, 0, buf->col);
- setup_bool_decoder(buf->data, data_end, buf->size, &cm->error,
- &tile_data->bit_reader, pbi->decrypt_cb,
- pbi->decrypt_state);
- vp10_init_macroblockd(cm, &tile_data->xd, tile_data->dqcoeff);
- tile_data->xd.plane[0].color_index_map = tile_data->color_index_map[0];
- tile_data->xd.plane[1].color_index_map = tile_data->color_index_map[1];
-
- worker->had_error = 0;
- if (i == num_workers - 1 || n == tile_cols - 1) {
- winterface->execute(worker);
- } else {
- winterface->launch(worker);
+ // Load tile data into tile_buffers
+ get_tile_buffers(pbi, data, data_end, tile_buffers);
+
+ for (tile_row = tile_rows_start ; tile_row < tile_rows_end ; ++tile_row) {
+ // Sort the buffers in this tile row based on size in descending order.
+ qsort(&tile_buffers[tile_row][tile_cols_start],
+ tile_cols_end - tile_cols_start, sizeof(tile_buffers[0][0]),
+ compare_tile_buffers);
+
+ // Rearrange the tile buffers in this tile row such that per-tile group
+ // the largest, and presumably the most difficult tile will be decoded in
+ // the main thread. This should help minimize the number of instances
+ // where the main thread is waiting for a worker to complete.
+ {
+ int group_start;
+ for (group_start = tile_cols_start ; group_start < tile_cols_end ;
+ group_start += num_workers) {
+ const int group_end = VPXMIN(group_start + num_workers, tile_cols);
+ const TileBufferDec largest = tile_buffers[tile_row][group_start];
+ memmove(&tile_buffers[tile_row][group_start],
+ &tile_buffers[tile_row][group_start + 1],
+ (group_end - group_start - 1) * sizeof(tile_buffers[0][0]));
+ tile_buffers[tile_row][group_end - 1] = largest;
}
+ }
- if (buf->col == tile_cols - 1) {
- final_worker = i;
- }
+ for (tile_col = tile_cols_start ; tile_col < tile_cols_end ; ) {
+ // Launch workers for individual columns
+ for (i = 0; i < num_workers && tile_col < tile_cols_end;
+ ++i, ++tile_col) {
+ TileBufferDec *const buf = &tile_buffers[tile_row][tile_col];
+ VPxWorker *const worker = &pbi->tile_workers[i];
+ TileWorkerData *const twd = (TileWorkerData*)worker->data1;
+ TileInfo *const tile_info = (TileInfo*)worker->data2;
+
+ twd->pbi = pbi;
+ twd->xd = pbi->mb;
+ twd->xd.corrupted = 0;
+ twd->xd.counts =
+ cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD ?
+ &twd->counts : NULL;
+ vp10_zero(twd->dqcoeff);
+ vp10_tile_init(tile_info, cm, tile_row, buf->col);
+ vp10_tile_init(&twd->xd.tile, cm, tile_row, buf->col);
+ setup_bool_decoder(buf->data, data_end, buf->size, &cm->error,
+ &twd->bit_reader,
+ pbi->decrypt_cb, pbi->decrypt_state);
+#if CONFIG_ANS
+ setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
+ &twd->token_ans, pbi->decrypt_cb,
+ pbi->decrypt_state);
+#endif // CONFIG_ANS
+ vp10_init_macroblockd(cm, &twd->xd, twd->dqcoeff);
+ twd->xd.plane[0].color_index_map = twd->color_index_map[0];
+ twd->xd.plane[1].color_index_map = twd->color_index_map[1];
- ++n;
- }
+ worker->had_error = 0;
+ if (i == num_workers - 1 || tile_col == tile_cols_end - 1) {
+ winterface->execute(worker);
+ } else {
+ winterface->launch(worker);
+ }
- for (; i > 0; --i) {
- VPxWorker *const worker = &pbi->tile_workers[i - 1];
- // TODO(jzern): The tile may have specific error data associated with
- // its vpx_internal_error_info which could be propagated to the main info
- // in cm. Additionally once the threads have been synced and an error is
- // detected, there's no point in continuing to decode tiles.
- pbi->mb.corrupted |= !winterface->sync(worker);
- }
- if (final_worker > -1) {
- TileWorkerData *const tile_data =
- (TileWorkerData*)pbi->tile_workers[final_worker].data1;
- bit_reader_end = vpx_reader_find_end(&tile_data->bit_reader);
- final_worker = -1;
- }
+#if !(CONFIG_ANS || CONFIG_EXT_TILE)
+ if (tile_row == tile_rows - 1 && buf->col == tile_cols - 1) {
+ final_worker = i;
+ }
+#endif // !(CONFIG_ANS || CONFIG_EXT_TILE)
+ }
- // Accumulate thread frame counts.
- if (n >= tile_cols &&
- cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
- for (i = 0; i < num_workers; ++i) {
- TileWorkerData *const tile_data =
- (TileWorkerData*)pbi->tile_workers[i].data1;
- vp10_accumulate_frame_counts(cm, &tile_data->counts, 1);
+ // Sync all workers
+ for (; i > 0; --i) {
+ VPxWorker *const worker = &pbi->tile_workers[i - 1];
+ // TODO(jzern): The tile may have specific error data associated with
+ // its vpx_internal_error_info which could be propagated to the main
+ // info in cm. Additionally once the threads have been synced and an
+ // error is detected, there's no point in continuing to decode tiles.
+ pbi->mb.corrupted |= !winterface->sync(worker);
}
}
}
- return bit_reader_end;
+ // Accumulate thread frame counts.
+ if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
+ for (i = 0; i < num_workers; ++i) {
+ TileWorkerData *const twd = (TileWorkerData*)pbi->tile_workers[i].data1;
+ vp10_accumulate_frame_counts(cm, &twd->counts, 1);
+ }
+ }
+
+#if CONFIG_ANS || CONFIG_EXT_TILE
+ return data_end;
+#else
+ assert(final_worker != -1);
+ {
+ TileWorkerData *const twd =
+ (TileWorkerData*)pbi->tile_workers[final_worker].data1;
+ return vpx_reader_find_end(&twd->bit_reader);
+ }
+#endif // CONFIG_ANS || CONFIG_EXT_TILE
}
static void error_handler(void *data) {
: read_tx_mode(rb);
cm->reference_mode = read_frame_reference_mode(cm, rb);
- setup_tile_info(cm, rb);
+ setup_tile_info(pbi, rb);
sz = vpx_rb_read_literal(rb, 16);
if (sz == 0)
uint8_t clear_data[MAX_VP9_HEADER_SIZE];
const size_t first_partition_size = read_uncompressed_header(pbi,
init_read_bit_buffer(pbi, &rb, data, data_end, clear_data));
- const int tile_rows = 1 << cm->log2_tile_rows;
- const int tile_cols = 1 << cm->log2_tile_cols;
YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
xd->cur_buf = new_fb;
cm->coef_probs_update_idx = 0;
#endif // CONFIG_ENTROPY
- if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) {
+ if (pbi->max_threads > 1
+#if CONFIG_EXT_TILE
+ && pbi->dec_tile_col < 0 // Decoding all columns
+#endif // CONFIG_EXT_TILE
+ && cm->tile_cols > 1) {
// Multi-threaded tile decoder
*p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
if (!xd->corrupted) {
struct vpx_internal_error_info error_info;
} TileWorkerData;
+typedef struct TileBufferDec {
+ const uint8_t *data;
+ size_t size;
+ int col; // only used with multi-threaded decoding
+} TileBufferDec;
+
typedef struct VP10Decoder {
DECLARE_ALIGNED(16, MACROBLOCKD, mb);
int num_tile_workers;
TileData *tile_data;
- int total_tiles;
+ int allocated_tiles;
+
+ TileBufferDec tile_buffers[MAX_TILE_ROWS][MAX_TILE_COLS];
VP9LfSync lf_row_sync;
int inv_tile_order;
int need_resync; // wait for key/intra-only frame.
int hold_ref_buf; // hold the reference buffer.
+
+ int tile_size_bytes;
+#if CONFIG_EXT_TILE
+ int tile_col_size_bytes;
+ int dec_tile_row, dec_tile_col;
+#endif // CONFIG_EXT_TILE
} VP10Decoder;
int vp10_receive_compressed_data(struct VP10Decoder *pbi,
}
#endif // CONFIG_EXT_TX
-static void pack_palette_tokens(vpx_writer *w, TOKENEXTRA **tp,
+static void pack_palette_tokens(vpx_writer *w, const TOKENEXTRA **tp,
int n, int num) {
int i;
- TOKENEXTRA *p = *tp;
+ const TOKENEXTRA *p = *tp;
for (i = 0; i < num; ++i) {
vp10_write_token(w, vp10_palette_color_tree[n - 2], p->context_tree,
#if !CONFIG_ANS
static void pack_mb_tokens(vpx_writer *w,
- TOKENEXTRA **tp, const TOKENEXTRA *const stop,
+ const TOKENEXTRA **tp, const TOKENEXTRA *const stop,
vpx_bit_depth_t bit_depth, const TX_SIZE tx) {
- TOKENEXTRA *p = *tp;
+ const TOKENEXTRA *p = *tp;
#if CONFIG_VAR_TX
int count = 0;
const int seg_eob = 16 << (tx << 1);
// coder.
static void pack_mb_tokens_ans(struct BufAnsCoder *ans,
const rans_dec_lut token_tab[COEFF_PROB_MODELS],
- TOKENEXTRA **tp,
+ const TOKENEXTRA **tp,
const TOKENEXTRA *const stop,
vpx_bit_depth_t bit_depth,
const TX_SIZE tx) {
- TOKENEXTRA *p = *tp;
+ const TOKENEXTRA *p = *tp;
#if CONFIG_VAR_TX
int count = 0;
const int seg_eob = 16 << (tx << 1);
#if CONFIG_VAR_TX
static void pack_txb_tokens(vpx_writer *w,
- TOKENEXTRA **tp, const TOKENEXTRA *const tok_end,
+ const TOKENEXTRA **tp,
+ const TOKENEXTRA *const tok_end,
MACROBLOCKD *xd, MB_MODE_INFO *mbmi, int plane,
BLOCK_SIZE plane_bsize,
vpx_bit_depth_t bit_depth,
#if CONFIG_ANS
struct BufAnsCoder *ans,
#endif // CONFIG_ANS
- TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
+ const TOKENEXTRA **tok,
+ const TOKENEXTRA *const tok_end,
#if CONFIG_SUPERTX
int supertx_enabled,
#endif
write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, bsize)
#endif // CONFIG_ANS && CONFIG_SUPERTX
-static void write_modes_sb(VP10_COMP *cpi, const TileInfo *const tile,
- vpx_writer *w,
+static void write_modes_sb(VP10_COMP *const cpi,
+ const TileInfo *const tile,
+ vpx_writer *const w,
#if CONFIG_ANS
struct BufAnsCoder *ans,
#endif // CONFIG_ANS
- TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
+ const TOKENEXTRA **tok,
+ const TOKENEXTRA *const tok_end,
#if CONFIG_SUPERTX
int supertx_enabled,
#endif
#endif // CONFIG_EXT_PARTITION_TYPES
}
-static void write_modes(VP10_COMP *cpi, const TileInfo *const tile,
- vpx_writer *w,
+static void write_modes(VP10_COMP *const cpi,
+ const TileInfo *const tile,
+ vpx_writer *const w,
#if CONFIG_ANS
struct BufAnsCoder *ans,
#endif // CONFIG_ANS
- TOKENEXTRA **tok, const TOKENEXTRA *const tok_end) {
+ const TOKENEXTRA **tok,
+ const TOKENEXTRA *const tok_end) {
+ VP10_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+ const int mi_row_start = tile->mi_row_start;
+ const int mi_row_end = tile->mi_row_end;
+ const int mi_col_start = tile->mi_col_start;
+ const int mi_col_end = tile->mi_col_end;
int mi_row, mi_col;
- for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
- mi_row += MI_BLOCK_SIZE) {
+ vp10_zero_above_context(cm, mi_col_start, mi_col_end);
+
+ for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE) {
vp10_zero_left_context(xd);
- for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
- mi_col += MI_BLOCK_SIZE)
- write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, 0, mi_row, mi_col,
- BLOCK_64X64);
+
+ for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) {
+ write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, 0,
+ mi_row, mi_col, BLOCK_64X64);
+ }
}
}
}
}
-static void write_tile_info(const VP10_COMMON *const cm,
+static void write_tile_info(VP10_COMMON *const cm,
struct vpx_write_bit_buffer *wb) {
+#if CONFIG_EXT_TILE
+ // TODO(geza.lore): Dependent on CU_SIZE
+ const int tile_width =
+ mi_cols_aligned_to_sb(cm->tile_width) >> MI_BLOCK_SIZE_LOG2;
+ const int tile_height =
+ mi_cols_aligned_to_sb(cm->tile_height) >> MI_BLOCK_SIZE_LOG2;
+
+ assert(tile_width > 0 && tile_width <= 64);
+ assert(tile_height > 0 && tile_height <= 64);
+
+ // Write the tile sizes
+ vpx_wb_write_literal(wb, tile_width - 1, 6);
+ vpx_wb_write_literal(wb, tile_height - 1, 6);
+#else
int min_log2_tile_cols, max_log2_tile_cols, ones;
vp10_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
vpx_wb_write_bit(wb, cm->log2_tile_rows != 0);
if (cm->log2_tile_rows != 0)
vpx_wb_write_bit(wb, cm->log2_tile_rows != 1);
+#endif // CONFIG_EXT_TILE
}
static int get_refresh_mask(VP10_COMP *cpi) {
}
}
-static size_t encode_tiles(VP10_COMP *cpi, uint8_t *data_ptr,
- unsigned int *max_tile_sz) {
+#if CONFIG_EXT_TILE
+static INLINE int find_identical_tile(
+ const int tile_row, const int tile_col,
+ TileBufferEnc (*const tile_buffers)[1024]) {
+ const MV32 candidate_offset[1] = {{1, 0}};
+ const uint8_t *const cur_tile_data =
+ tile_buffers[tile_row][tile_col].data + 4;
+ const unsigned int cur_tile_size = tile_buffers[tile_row][tile_col].size;
+
+ int i;
+
+ if (tile_row == 0)
+ return 0;
+
+ // (TODO: yunqingwang) For now, only above tile is checked and used.
+ // More candidates such as left tile can be added later.
+ for (i = 0; i < 1; i++) {
+ int row_offset = candidate_offset[0].row;
+ int col_offset = candidate_offset[0].col;
+ int row = tile_row - row_offset;
+ int col = tile_col - col_offset;
+ uint8_t tile_hdr;
+ const uint8_t *tile_data;
+ TileBufferEnc *candidate;
+
+ if (row < 0 || col < 0)
+ continue;
+
+ tile_hdr = *(tile_buffers[row][col].data);
+
+ // Read out tcm bit
+ if ((tile_hdr >> 7) == 1) {
+ // The candidate is a copy tile itself
+ row_offset += tile_hdr & 0x7f;
+ row = tile_row - row_offset;
+ }
+
+ candidate = &tile_buffers[row][col];
+
+ if (row_offset >= 128 || candidate->size != cur_tile_size)
+ continue;
+
+ tile_data = candidate->data + 4;
+
+ if (memcmp(tile_data, cur_tile_data, cur_tile_size) != 0)
+ continue;
+
+ // Identical tile found
+ assert(row_offset > 0);
+ return row_offset;
+ }
+
+ // No identical tile found
+ return 0;
+}
+#endif // CONFIG_EXT_TILE
+
+static uint32_t write_tiles(VP10_COMP *const cpi,
+ uint8_t *const dst,
+ unsigned int *max_tile_size,
+ unsigned int *max_tile_col_size) {
VP10_COMMON *const cm = &cpi->common;
vpx_writer mode_bc;
#if CONFIG_ANS
struct BufAnsCoder buffered_ans;
#endif // CONFIG_ANS
int tile_row, tile_col;
- TOKENEXTRA *tok_end;
+ TOKENEXTRA *(*const tok_buffers)[MAX_TILE_COLS] = cpi->tile_tok;
+ TileBufferEnc (*const tile_buffers)[MAX_TILE_COLS] = cpi->tile_buffers;
size_t total_size = 0;
- const int tile_cols = 1 << cm->log2_tile_cols;
- const int tile_rows = 1 << cm->log2_tile_rows;
- unsigned int max_tile = 0;
+ const int tile_cols = cm->tile_cols;
+ const int tile_rows = cm->tile_rows;
+#if CONFIG_EXT_TILE
+ const int have_tiles = tile_cols * tile_rows > 1;
+#endif // COFIG_EXT_TILE
const int ans_window_size = get_token_alloc(cm->mb_rows, cm->mb_cols) * 3;
struct buffered_ans_symbol *uco_ans_buf =
malloc(ans_window_size * sizeof(*uco_ans_buf));
assert(uco_ans_buf);
- vp10_zero_above_context(cm, 0, mi_cols_aligned_to_sb(cm->mi_cols));
+ *max_tile_size = 0;
+ *max_tile_col_size = 0;
- for (tile_row = 0; tile_row < tile_rows; tile_row++) {
- for (tile_col = 0; tile_col < tile_cols; tile_col++) {
- int tile_idx = tile_row * tile_cols + tile_col;
- int put_tile_size = tile_col < tile_cols - 1 || tile_row < tile_rows - 1;
- uint8_t *const mode_data_start =
- data_ptr + total_size + (put_tile_size ? 4 : 0);
- int token_section_size;
- TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
+ // All tile size fields are output on 4 bytes. A call to remux_tiles will
+ // later compact the data if smaller headers are adequate.
- tok_end = cpi->tile_tok[tile_row][tile_col] +
- cpi->tok_count[tile_row][tile_col];
+#if CONFIG_EXT_TILE
+ for (tile_col = 0; tile_col < tile_cols; tile_col++) {
+ TileInfo tile_info;
+ const int is_last_col = (tile_col == tile_cols - 1);
+ const size_t col_offset = total_size;
- vpx_start_encode(&mode_bc, mode_data_start);
+ vp10_tile_set_col(&tile_info, cm, tile_col);
+
+ // The last column does not have a column header
+ if (!is_last_col)
+ total_size += 4;
+
+ for (tile_row = 0; tile_row < tile_rows; tile_row++) {
+ TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col];
+ unsigned int tile_size;
+ const TOKENEXTRA *tok = tok_buffers[tile_row][tile_col];
+ const TOKENEXTRA *tok_end = tok + cpi->tok_count[tile_row][tile_col];
+
+ vp10_tile_set_row(&tile_info, cm, tile_row);
+
+ buf->data = dst + total_size;
+
+ // Is CONFIG_EXT_TILE = 1, every tile in the row has a header,
+ // even for the last one, unless no tiling is used at all.
+ if (have_tiles) {
+ total_size += 4;
+ vpx_start_encode(&mode_bc, buf->data + 4);
+ } else {
+ vpx_start_encode(&mode_bc, buf->data);
+ }
#if !CONFIG_ANS
- (void) token_section_size;
- write_modes(cpi, &cpi->tile_data[tile_idx].tile_info, &mode_bc, &tok,
- tok_end);
+ write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end);
assert(tok == tok_end);
vpx_stop_encode(&mode_bc);
- if (put_tile_size) {
- unsigned int tile_sz;
+ tile_size = mode_bc.pos;
+#else
+ buf_ans_write_init(&buffered_ans, uco_ans_buf, ans_window_size);
+ write_modes(cpi, &tile_info, &mode_bc, &buffered_ans, &tok, tok_end);
+ assert(tok == tok_end);
+ vpx_stop_encode(&mode_bc);
+ tile_size = mode_bc.pos;
- // size of this tile
- assert(mode_bc.pos > 0);
- tile_sz = mode_bc.pos - 1;
- mem_put_le32(data_ptr + total_size, tile_sz);
- max_tile = max_tile > tile_sz ? max_tile : tile_sz;
- total_size += 4;
+ ans_write_init(&token_ans, dst + total_size + tile_size);
+ buf_ans_flush(&buffered_ans, &token_ans);
+ tile_size += ans_write_end(&token_ans);
+#endif // !CONFIG_ANS
+
+ buf->size = tile_size;
+
+ // Record the maximum tile size we see, so we can compact headers later.
+ *max_tile_size = VPXMAX(*max_tile_size, tile_size);
+
+ if (have_tiles) {
+ // tile header: size of this tile, or copy offset
+ uint32_t tile_header = tile_size;
+
+ // Check if this tile is a copy tile.
+ // Very low chances to have copy tiles on the key frames, so don't
+ // search on key frames to reduce unnecessary search.
+ if (cm->frame_type != KEY_FRAME) {
+ const int idendical_tile_offset =
+ find_identical_tile(tile_row, tile_col, tile_buffers);
+
+ if (idendical_tile_offset > 0) {
+ tile_size = 0;
+ tile_header = idendical_tile_offset | 0x80;
+ tile_header <<= 24;
+ }
+ }
+
+ mem_put_le32(buf->data, tile_header);
}
- total_size += mode_bc.pos;
+
+ total_size += tile_size;
+ }
+
+ if (!is_last_col) {
+ size_t col_size = total_size - col_offset - 4;
+ mem_put_le32(dst + col_offset, col_size);
+
+ // If it is not final packing, record the maximum tile column size we see,
+ // otherwise, check if the tile size is out of the range.
+ *max_tile_col_size = VPXMAX(*max_tile_col_size, col_size);
+ }
+ }
+#else
+ for (tile_row = 0; tile_row < tile_rows; tile_row++) {
+ TileInfo tile_info;
+ const int is_last_row = (tile_row == tile_rows - 1);
+
+ vp10_tile_set_row(&tile_info, cm, tile_row);
+
+ for (tile_col = 0; tile_col < tile_cols; tile_col++) {
+ TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col];
+ const int is_last_col = (tile_col == tile_cols - 1);
+ const int is_last_tile = is_last_col && is_last_row;
+ unsigned int tile_size;
+ const TOKENEXTRA *tok = tok_buffers[tile_row][tile_col];
+ const TOKENEXTRA *tok_end = tok + cpi->tok_count[tile_row][tile_col];
+
+ vp10_tile_set_col(&tile_info, cm, tile_col);
+
+ buf->data = dst + total_size;
+
+ // The last tile does not have a header.
+ if (!is_last_tile)
+ total_size += 4;
+
+ vpx_start_encode(&mode_bc, dst + total_size);
+
+#if !CONFIG_ANS
+ write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end);
+ assert(tok == tok_end);
+ vpx_stop_encode(&mode_bc);
+ tile_size = mode_bc.pos;
#else
buf_ans_write_init(&buffered_ans, uco_ans_buf, ans_window_size);
- write_modes(cpi, &cpi->tile_data[tile_idx].tile_info, &mode_bc,
- &buffered_ans, &tok, tok_end);
+ write_modes(cpi, &tile_info, &mode_bc, &buffered_ans, &tok, tok_end);
assert(tok == tok_end);
vpx_stop_encode(&mode_bc);
- ans_write_init(&token_ans, mode_data_start + mode_bc.pos);
+ tile_size = mode_bc.pos;
+
+ ans_write_init(&token_ans, dst + total_size + tile_size);
buf_ans_flush(&buffered_ans, &token_ans);
- token_section_size = ans_write_end(&token_ans);
- if (put_tile_size) {
+ tile_size += ans_write_end(&token_ans);
+#endif // !CONFIG_ANS
+
+ assert(tile_size > 0);
+
+ buf->size = tile_size;
+
+ if (!is_last_tile) {
+ *max_tile_size = VPXMAX(*max_tile_size, tile_size);
// size of this tile
- mem_put_be32(data_ptr + total_size,
- 4 + mode_bc.pos + token_section_size);
- total_size += 4;
+ mem_put_le32(buf->data, tile_size);
}
- total_size += mode_bc.pos + token_section_size;
-#endif // !CONFIG_ANS
+
+ total_size += tile_size;
}
}
- *max_tile_sz = max_tile;
+#endif // CONFIG_EXT_TILE
#if CONFIG_ANS
free(uco_ans_buf);
write_tile_info(cm, wb);
}
-static size_t write_compressed_header(VP10_COMP *cpi, uint8_t *data) {
+static uint32_t write_compressed_header(VP10_COMP *cpi, uint8_t *data) {
VP10_COMMON *const cm = &cpi->common;
#if CONFIG_SUPERTX
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
return header_bc.pos;
}
-static int remux_tiles(uint8_t *dest, const int sz,
- const int n_tiles, const int mag) {
- int rpos = 0, wpos = 0, n;
+static int choose_size_bytes(uint32_t size, int spare_msbs) {
+ // Choose the number of bytes required to represent size, without
+ // using the 'spare_msbs' number of most significant bits.
- for (n = 0; n < n_tiles; n++) {
- int tile_sz;
+ // Make sure we will fit in 4 bytes to start with..
+ if (spare_msbs > 0 && size >> (32 - spare_msbs) != 0)
+ return -1;
- if (n == n_tiles - 1) {
- tile_sz = sz - rpos;
- } else {
- tile_sz = mem_get_le32(&dest[rpos]) + 1;
- rpos += 4;
- switch (mag) {
- case 0:
- dest[wpos] = tile_sz - 1;
- break;
- case 1:
- mem_put_le16(&dest[wpos], tile_sz - 1);
- break;
- case 2:
- mem_put_le24(&dest[wpos], tile_sz - 1);
- break;
- case 3: // remuxing should only happen if mag < 3
- default:
- assert("Invalid value for tile size magnitude" && 0);
+ // Normalise to 32 bits
+ size <<= spare_msbs;
+
+ if (size >> 24 != 0)
+ return 4;
+ else if (size >> 16 != 0)
+ return 3;
+ else if (size >> 8 != 0)
+ return 2;
+ else
+ return 1;
+}
+
+static void mem_put_varsize(uint8_t *const dst, const int sz, const int val) {
+ switch (sz) {
+ case 1:
+ dst[0] = (uint8_t)(val & 0xff);
+ break;
+ case 2:
+ mem_put_le16(dst, val);
+ break;
+ case 3:
+ mem_put_le24(dst, val);
+ break;
+ case 4:
+ mem_put_le32(dst, val);
+ break;
+ default:
+ assert("Invalid size" && 0);
+ break;
+ }
+}
+
+static int remux_tiles(const VP10_COMMON *const cm,
+ uint8_t *dst,
+ const uint32_t data_size,
+ const uint32_t max_tile_size,
+ const uint32_t max_tile_col_size,
+ int *const tile_size_bytes,
+ int *const tile_col_size_bytes) {
+ // Choose the tile size bytes (tsb) and tile column size bytes (tcsb)
+#if CONFIG_EXT_TILE
+ // The top bit in the tile size field indicates tile copy mode, so we
+ // have 1 less bit to code the tile size
+ const int tsb = choose_size_bytes(max_tile_size, 1);
+ const int tcsb = choose_size_bytes(max_tile_col_size, 0);
+#else
+ const int tsb = choose_size_bytes(max_tile_size, 0);
+ const int tcsb = 4; // This is ignored
+ (void) max_tile_col_size;
+#endif // CONFIG_EXT_TILE
+
+ assert(tsb > 0);
+ assert(tcsb > 0);
+
+ *tile_size_bytes = tsb;
+ *tile_col_size_bytes = tcsb;
+
+ if (tsb == 4 && tcsb == 4) {
+ return data_size;
+ } else {
+ uint32_t wpos = 0;
+ uint32_t rpos = 0;
+
+#if CONFIG_EXT_TILE
+ int tile_row;
+ int tile_col;
+
+ for (tile_col = 0 ; tile_col < cm->tile_cols ; tile_col++) {
+ // All but the last column has a column header
+ if (tile_col < cm->tile_cols - 1) {
+ uint32_t tile_col_size = mem_get_le32(dst + rpos);
+ rpos += 4;
+
+ // Adjust the tile column size by the number of bytes removed
+ // from the tile size fields.
+ tile_col_size -= (4-tsb) * cm->tile_rows;
+
+ mem_put_varsize(dst + wpos, tcsb, tile_col_size);
+ wpos += tcsb;
+ }
+
+ for (tile_row = 0 ; tile_row < cm->tile_rows ; tile_row++) {
+ // All, including the last row has a header
+ uint32_t tile_header = mem_get_le32(dst + rpos);
+ rpos += 4;
+
+ // If this is a copy tile, we need to shift the MSB to the
+ // top bit of the new width, and there is no data to copy.
+ if (tile_header >> 31 != 0) {
+ if (tsb < 4)
+ tile_header >>= 32 - 8 * tsb;
+ mem_put_varsize(dst + wpos, tsb, tile_header);
+ wpos += tsb;
+ } else {
+ mem_put_varsize(dst + wpos, tsb, tile_header);
+ wpos += tsb;
+
+ memmove(dst + wpos, dst + rpos, tile_header);
+ rpos += tile_header;
+ wpos += tile_header;
+ }
}
- wpos += mag + 1;
}
+#else
+ const int n_tiles = cm->tile_cols * cm->tile_rows;
+ int n;
- memmove(&dest[wpos], &dest[rpos], tile_sz);
- wpos += tile_sz;
- rpos += tile_sz;
- }
+ for (n = 0; n < n_tiles; n++) {
+ int tile_size;
- assert(rpos > wpos);
- assert(rpos == sz);
+ if (n == n_tiles - 1) {
+ tile_size = data_size - rpos;
+ } else {
+ tile_size = mem_get_le32(dst + rpos);
+ rpos += 4;
+ mem_put_varsize(dst + wpos, tsb, tile_size);
+ wpos += tsb;
+ }
+
+ memmove(dst + wpos, dst + rpos, tile_size);
+
+ rpos += tile_size;
+ wpos += tile_size;
+ }
+#endif // CONFIG_EXT_TILE
+
+ assert(rpos > wpos);
+ assert(rpos == data_size);
- return wpos;
+ return wpos;
+ }
}
-void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dest, size_t *size) {
- uint8_t *data = dest;
- size_t first_part_size, uncompressed_hdr_size, data_sz;
+void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dst, size_t *size) {
+ uint8_t *data = dst;
+ uint32_t compressed_header_size;
+ uint32_t uncompressed_header_size;
+ uint32_t data_size;
struct vpx_write_bit_buffer wb = {data, 0};
struct vpx_write_bit_buffer saved_wb;
- unsigned int max_tile;
+ unsigned int max_tile_size;
+ unsigned int max_tile_col_size;
+ int tile_size_bytes;
+ int tile_col_size_bytes;
+
VP10_COMMON *const cm = &cpi->common;
- const int n_log2_tiles = cm->log2_tile_rows + cm->log2_tile_cols;
- const int have_tiles = n_log2_tiles > 0;
+ const int have_tiles = cm->tile_cols * cm->tile_rows > 1;
+ // Write the uncompressed header
write_uncompressed_header(cpi, &wb);
- saved_wb = wb;
- // don't know in advance first part. size
- vpx_wb_write_literal(&wb, 0, 16 + have_tiles * 2);
- uncompressed_hdr_size = vpx_wb_bytes_written(&wb);
- data += uncompressed_hdr_size;
+ // We do not know these in advance. Output placeholder bit.
+ saved_wb = wb;
+ // Write tile size magnitudes
+ if (have_tiles) {
+ // Note that the last item in the uncompressed header is the data
+ // describing tile configuration.
+#if CONFIG_EXT_TILE
+ // Number of bytes in tile column size - 1
+ vpx_wb_write_literal(&wb, 0, 2);
+#endif // CONFIG_EXT_TILE
+ // Number of bytes in tile size - 1
+ vpx_wb_write_literal(&wb, 0, 2);
+ }
+ // Size of compressed header
+ vpx_wb_write_literal(&wb, 0, 16);
+
+ uncompressed_header_size = vpx_wb_bytes_written(&wb);
+ data += uncompressed_header_size;
vpx_clear_system_state();
- first_part_size = write_compressed_header(cpi, data);
- data += first_part_size;
+ // Write the compressed header
+ compressed_header_size = write_compressed_header(cpi, data);
+ data += compressed_header_size;
- data_sz = encode_tiles(cpi, data, &max_tile);
- if (max_tile > 0) {
- int mag;
- unsigned int mask;
+ // Write the encoded tile data
+ data_size = write_tiles(cpi, data, &max_tile_size, &max_tile_col_size);
- // Choose the (tile size) magnitude
- for (mag = 0, mask = 0xff; mag < 4; mag++) {
- if (max_tile <= mask)
- break;
- mask <<= 8;
- mask |= 0xff;
- }
- assert(n_log2_tiles > 0);
- vpx_wb_write_literal(&saved_wb, mag, 2);
- if (mag < 3)
- data_sz = remux_tiles(data, (int)data_sz, 1 << n_log2_tiles, mag);
- } else {
- assert(n_log2_tiles == 0);
+ if (have_tiles) {
+ data_size = remux_tiles(cm, data, data_size,
+ max_tile_size, max_tile_col_size,
+ &tile_size_bytes, &tile_col_size_bytes);
}
- data += data_sz;
- // TODO(jbb): Figure out what to do if first_part_size > 16 bits.
- vpx_wb_write_literal(&saved_wb, (int)first_part_size, 16);
+ data += data_size;
+
+ // Now fill in the gaps in the uncompressed header.
+ if (have_tiles) {
+#if CONFIG_EXT_TILE
+ assert(tile_col_size_bytes >= 1 && tile_col_size_bytes <= 4);
+ vpx_wb_write_literal(&saved_wb, tile_col_size_bytes - 1, 2);
+#endif // CONFIG_EXT_TILE
+ assert(tile_size_bytes >= 1 && tile_size_bytes <= 4);
+ vpx_wb_write_literal(&saved_wb, tile_size_bytes - 1, 2);
+ }
+ // TODO(jbb): Figure out what to do if compressed_header_size > 16 bits.
+ assert(compressed_header_size <= 0xffff);
+ vpx_wb_write_literal(&saved_wb, compressed_header_size, 16);
- *size = data - dest;
+ *size = data - dst;
}
#include "vp10/encoder/encoder.h"
-void vp10_encode_token_init();
void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dest, size_t *size);
void vp10_encode_token_init();
int mi_row,
TOKENEXTRA **tp) {
VP10_COMMON *const cm = &cpi->common;
- TileInfo *const tile_info = &tile_data->tile_info;
+ const TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
SPEED_FEATURES *const sf = &cpi->sf;
int mi_col;
+ // Initialize the left context for the new SB row
vp10_zero_left_context(xd);
// Code each SB in the row
MACROBLOCK *const x = &cpi->td.mb;
VP10_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
- const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
// Copy data over into macro block data structures.
vp10_setup_src_planes(x, cpi->Source, 0, 0);
vp10_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
-
- vp10_zero_above_context(cm, 0, aligned_mi_cols);
}
static int check_dual_ref_flags(VP10_COMP *cpi) {
void vp10_init_tile_data(VP10_COMP *cpi) {
VP10_COMMON *const cm = &cpi->common;
- const int tile_cols = 1 << cm->log2_tile_cols;
- const int tile_rows = 1 << cm->log2_tile_rows;
+ const int tile_cols = cm->tile_cols;
+ const int tile_rows = cm->tile_rows;
int tile_col, tile_row;
TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
- int tile_tok = 0;
+ unsigned int tile_tok = 0;
if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
if (cpi->tile_data != NULL)
for (tile_row = 0; tile_row < tile_rows; ++tile_row)
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
- TileDataEnc *tile_data =
+ TileDataEnc *const tile_data =
&cpi->tile_data[tile_row * tile_cols + tile_col];
int i, j;
for (i = 0; i < BLOCK_SIZES; ++i) {
for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
- TileInfo *tile_info =
+ TileInfo *const tile_info =
&cpi->tile_data[tile_row * tile_cols + tile_col].tile_info;
vp10_tile_init(tile_info, cm, tile_row, tile_col);
void vp10_encode_tile(VP10_COMP *cpi, ThreadData *td,
int tile_row, int tile_col) {
VP10_COMMON *const cm = &cpi->common;
- const int tile_cols = 1 << cm->log2_tile_cols;
- TileDataEnc *this_tile =
- &cpi->tile_data[tile_row * tile_cols + tile_col];
+ TileDataEnc *const this_tile =
+ &cpi->tile_data[tile_row * cm->tile_cols + tile_col];
const TileInfo * const tile_info = &this_tile->tile_info;
TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
int mi_row;
+ vp10_zero_above_context(cm, tile_info->mi_col_start, tile_info->mi_col_end);
+
// Set up pointers to per thread motion search counters.
td->mb.m_search_count_ptr = &td->rd_counts.m_search_count;
td->mb.ex_search_count_ptr = &td->rd_counts.ex_search_count;
mi_row += MI_BLOCK_SIZE) {
encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
}
+
cpi->tok_count[tile_row][tile_col] =
(unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]);
- assert(tok - cpi->tile_tok[tile_row][tile_col] <=
- allocated_tokens(*tile_info));
+ assert(cpi->tok_count[tile_row][tile_col] <= allocated_tokens(*tile_info));
}
static void encode_tiles(VP10_COMP *cpi) {
VP10_COMMON *const cm = &cpi->common;
- const int tile_cols = 1 << cm->log2_tile_cols;
- const int tile_rows = 1 << cm->log2_tile_rows;
int tile_col, tile_row;
vp10_init_tile_data(cpi);
- for (tile_row = 0; tile_row < tile_rows; ++tile_row)
- for (tile_col = 0; tile_col < tile_cols; ++tile_col)
+ for (tile_row = 0; tile_row < cm->tile_rows; ++tile_row)
+ for (tile_col = 0; tile_col < cm->tile_cols; ++tile_col)
vp10_encode_tile(cpi, &cpi->td, tile_row, tile_col);
}
#endif
// If allowed, encoding tiles in parallel with one thread handling one tile.
- if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1)
+ // TODO(geza.lore): The multi-threaded encoder is not safe with more than
+ // 1 tile rows, as it uses the single above_context et al arrays from
+ // cpi->common
+ if (VPXMIN(cpi->oxcf.max_threads, cm->tile_cols) > 1 && cm->tile_rows == 1)
vp10_encode_tiles_mt(cpi);
else
encode_tiles(cpi);
static void set_tile_limits(VP10_COMP *cpi) {
VP10_COMMON *const cm = &cpi->common;
+#if CONFIG_EXT_TILE
+ cm->tile_width = clamp(cpi->oxcf.tile_columns, 1, 64) << MI_BLOCK_SIZE_LOG2;
+ cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 64) << MI_BLOCK_SIZE_LOG2;
+ cm->tile_width = VPXMIN(cm->tile_width, cm->mi_cols);
+ cm->tile_height = VPXMIN(cm->tile_height, cm->mi_rows);
+
+ // Get the number of tiles
+ cm->tile_cols = 1;
+ while (cm->tile_cols * cm->tile_width < cm->mi_cols)
+ ++cm->tile_cols;
+
+ cm->tile_rows = 1;
+ while (cm->tile_rows * cm->tile_height < cm->mi_rows)
+ ++cm->tile_rows;
+#else
int min_log2_tile_cols, max_log2_tile_cols;
vp10_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
cm->log2_tile_cols = clamp(cpi->oxcf.tile_columns,
min_log2_tile_cols, max_log2_tile_cols);
cm->log2_tile_rows = cpi->oxcf.tile_rows;
+
+ cm->tile_cols = 1 << cm->log2_tile_cols;
+ cm->tile_rows = 1 << cm->log2_tile_rows;
+
+ cm->tile_width = (mi_cols_aligned_to_sb(cm->mi_cols) >> cm->log2_tile_cols);
+ cm->tile_height = (mi_cols_aligned_to_sb(cm->mi_rows) >> cm->log2_tile_rows);
+ // round to integer multiples of 8
+ cm->tile_width = mi_cols_aligned_to_sb(cm->tile_width);
+ cm->tile_height = mi_cols_aligned_to_sb(cm->tile_height);
+#endif // CONFIG_EXT_TILE
}
static void update_frame_size(VP10_COMP *cpi) {
// to recode.
if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
save_coding_context(cpi);
+
vp10_pack_bitstream(cpi, dest, size);
+
rc->projected_frame_size = (int)(*size) << 3;
restore_coding_context(cpi);
} SUBFRAME_STATS;
#endif // CONFIG_ENTROPY
+typedef struct TileBufferEnc {
+ uint8_t *data;
+ size_t size;
+} TileBufferEnc;
+
typedef struct VP10_COMP {
QUANTS quants;
ThreadData td;
EncRefCntBuffer upsampled_ref_bufs[MAX_REF_FRAMES];
int upsampled_ref_idx[MAX_REF_FRAMES];
- TileDataEnc *tile_data;
- int allocated_tiles; // Keep track of memory allocated for tiles.
-
// For a still frame, this flag is set to 1 to skip partition search.
int partition_search_skippable_frame;
YV12_BUFFER_CONFIG last_frame_db;
#endif // CONFIG_LOOP_RESTORATION
- TOKENEXTRA *tile_tok[4][1 << 6];
- unsigned int tok_count[4][1 << 6];
-
// Ambient reconstruction err target for force key frames
int64_t ambient_err;
YV12_BUFFER_CONFIG alt_ref_buffer;
-
#if CONFIG_INTERNAL_STATS
unsigned int mode_chosen_counts[MAX_MODES];
int multi_arf_allowed;
int multi_arf_enabled;
int multi_arf_last_grp_enabled;
+
+ TileDataEnc *tile_data;
+ int allocated_tiles; // Keep track of memory allocated for tiles.
+
+ TOKENEXTRA *tile_tok[MAX_TILE_ROWS][MAX_TILE_COLS];
+ unsigned int tok_count[MAX_TILE_ROWS][MAX_TILE_COLS];
+
+ TileBufferEnc tile_buffers[MAX_TILE_ROWS][MAX_TILE_COLS];
+
#if CONFIG_VP9_TEMPORAL_DENOISING
VP9_DENOISER denoiser;
#endif
buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf : NULL;
}
-static INLINE int get_token_alloc(int mb_rows, int mb_cols) {
+static INLINE unsigned int get_token_alloc(int mb_rows, int mb_cols) {
// TODO(JBB): double check we can't exceed this token count if we have a
// 32x32 transform crossing a boundary at a multiple of 16.
// mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full
// Get the allocated token size for a tile. It does the same calculation as in
// the frame token allocation.
-static INLINE int allocated_tokens(TileInfo tile) {
+static INLINE unsigned int allocated_tokens(TileInfo tile) {
int tile_mb_rows = (tile.mi_row_end - tile.mi_row_start + 1) >> 1;
int tile_mb_cols = (tile.mi_col_end - tile.mi_col_start + 1) >> 1;
static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
VP10_COMP *const cpi = thread_data->cpi;
const VP10_COMMON *const cm = &cpi->common;
- const int tile_cols = 1 << cm->log2_tile_cols;
- const int tile_rows = 1 << cm->log2_tile_rows;
+ const int tile_cols = cm->tile_cols;
+ const int tile_rows = cm->tile_rows;
int t;
(void) unused;
void vp10_encode_tiles_mt(VP10_COMP *cpi) {
VP10_COMMON *const cm = &cpi->common;
- const int tile_cols = 1 << cm->log2_tile_cols;
+ const int tile_cols = cm->tile_cols;
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols);
int i;
// Only run once to create threads and allocate thread data.
if (cpi->num_workers == 0) {
- int allocated_workers = num_workers;
-
CHECK_MEM_ERROR(cm, cpi->workers,
- vpx_malloc(allocated_workers * sizeof(*cpi->workers)));
+ vpx_malloc(num_workers * sizeof(*cpi->workers)));
CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
- vpx_calloc(allocated_workers,
+ vpx_calloc(num_workers,
sizeof(*cpi->tile_thr_data)));
- for (i = 0; i < allocated_workers; i++) {
+ for (i = 0; i < num_workers; i++) {
VPxWorker *const worker = &cpi->workers[i];
- EncWorkerData *thread_data = &cpi->tile_thr_data[i];
+ EncWorkerData *const thread_data = &cpi->tile_thr_data[i];
++cpi->num_workers;
winterface->init(worker);
- if (i < allocated_workers - 1) {
- thread_data->cpi = cpi;
+ thread_data->cpi = cpi;
+
+ if (i < num_workers - 1) {
// Allocate thread data.
CHECK_MEM_ERROR(cm, thread_data->td,
"Tile encoder thread creation failed");
} else {
// Main thread acts as a worker and uses the thread data in cpi.
- thread_data->cpi = cpi;
thread_data->td = &cpi->td;
}
lf->filter_level = vp10_search_filter_level(
sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL);
}
+
+#if CONFIG_EXT_TILE
+ // TODO(any): 0 loopfilter level is only necessary if individual tile
+ // decoding is required. We need to communicate this requirement to this
+ // code and force loop filter level 0 only if required.
+ lf->filter_level = 0;
+#endif // CONFIG_EXT_TILE
}
#endif // !CONFIG_LOOP_RESTORATION
int no_pred_cost;
int t_pred_cost = INT_MAX;
- int i, tile_col, mi_row, mi_col;
+ int i, tile_col, tile_row, mi_row, mi_col;
unsigned (*temporal_predictor_count)[2] = cm->counts.seg.pred;
unsigned *no_pred_segcounts = cm->counts.seg.tree_total;
// First of all generate stats regarding how well the last segment map
// predicts this one
- for (tile_col = 0; tile_col < 1 << cm->log2_tile_cols; tile_col++) {
- TileInfo tile;
- MODE_INFO **mi_ptr;
- vp10_tile_init(&tile, cm, 0, tile_col);
-
- mi_ptr = cm->mi_grid_visible + tile.mi_col_start;
- for (mi_row = 0; mi_row < cm->mi_rows;
- mi_row += 8, mi_ptr += 8 * cm->mi_stride) {
- MODE_INFO **mi = mi_ptr;
- for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
- mi_col += 8, mi += 8)
- count_segs_sb(cm, xd, &tile, mi, no_pred_segcounts,
- temporal_predictor_count, t_unpred_seg_counts,
- mi_row, mi_col, BLOCK_64X64);
+ for (tile_row = 0; tile_row < cm->tile_rows; tile_row++) {
+ TileInfo tile_info;
+ vp10_tile_set_row(&tile_info, cm, tile_row);
+ for (tile_col = 0; tile_col < cm->tile_cols; tile_col++) {
+ MODE_INFO **mi_ptr;
+ vp10_tile_set_col(&tile_info, cm, tile_col);
+ mi_ptr = cm->mi_grid_visible + tile_info.mi_row_start * cm->mi_stride +
+ tile_info.mi_col_start;
+ for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
+ mi_row += 8, mi_ptr += 8 * cm->mi_stride) {
+ MODE_INFO **mi = mi_ptr;
+ for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
+ mi_col += 8, mi += 8) {
+ count_segs_sb(cm, xd, &tile_info, mi, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts,
+ mi_row, mi_col, BLOCK_64X64);
+ }
+ }
}
}
+
// Work out probability tree for coding segments without prediction
// and the cost.
calc_segtree_probs(no_pred_segcounts, no_pred_tree, segp->tree_probs);
0, // noise_sensitivity
0, // sharpness
0, // static_thresh
- 6, // tile_columns
+#if CONFIG_EXT_TILE
+ 64, // tile_columns
+ 64, // tile_rows
+#else
+ 0, // tile_columns
0, // tile_rows
+#endif // CONFIG_EXT_TILE
7, // arnr_max_frames
5, // arnr_strength
0, // min_gf_interval; 0 -> default decision
RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2);
RANGE_CHECK(extra_cfg, cpu_used, -8, 8);
RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6);
+#if CONFIG_EXT_TILE
+ // TODO(any): Waring. If CONFIG_EXT_TILE is true, tile_columns really
+ // means tile_width, and tile_rows really means tile_hight. The interface
+ // should be sanitized.
+ RANGE_CHECK(extra_cfg, tile_columns, 1, 64);
+ RANGE_CHECK(extra_cfg, tile_rows, 1, 64);
+#else
RANGE_CHECK(extra_cfg, tile_columns, 0, 6);
RANGE_CHECK(extra_cfg, tile_rows, 0, 2);
+#endif // CONFIG_EXT_TILE
RANGE_CHECK_HI(extra_cfg, sharpness, 7);
RANGE_CHECK(extra_cfg, arnr_max_frames, 0, 15);
RANGE_CHECK_HI(extra_cfg, arnr_strength, 6);
#include "vp10/common/alloccommon.h"
#include "vp10/common/frame_buffers.h"
+#include "vp10/common/enums.h"
#include "vp10/decoder/decoder.h"
#include "vp10/decoder/decodeframe.h"
frame_worker_data->pbi->decrypt_cb = ctx->decrypt_cb;
frame_worker_data->pbi->decrypt_state = ctx->decrypt_state;
+#if CONFIG_EXT_TILE
+ frame_worker_data->pbi->dec_tile_row = ctx->cfg.tile_row;
+ frame_worker_data->pbi->dec_tile_col = ctx->cfg.tile_col;
+#endif // CONFIG_EXT_TILE
+
worker->had_error = 0;
winterface->execute(worker);
if (ctx->need_resync)
return NULL;
yuvconfig2image(&ctx->img, &sd, frame_worker_data->user_priv);
+
+
+#if CONFIG_EXT_TILE
+ if (frame_worker_data->pbi->dec_tile_row >= 0) {
+ const int tile_row = VPXMIN(frame_worker_data->pbi->dec_tile_row,
+ cm->tile_rows - 1);
+ const int mi_row = tile_row * cm->tile_height;
+ const int ssy = ctx->img.y_chroma_shift;
+ int plane;
+ ctx->img.planes[0] += mi_row * MI_SIZE * ctx->img.stride[0];
+ for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
+ ctx->img.planes[plane] += mi_row * (MI_SIZE >> ssy) *
+ ctx->img.stride[plane];
+ }
+ ctx->img.d_h = VPXMIN(cm->tile_height, cm->mi_rows - mi_row) *
+ MI_SIZE;
+ }
+
+ if (frame_worker_data->pbi->dec_tile_col >= 0) {
+ const int tile_col = VPXMIN(frame_worker_data->pbi->dec_tile_col,
+ cm->tile_cols - 1);
+ const int mi_col = tile_col * cm->tile_width;
+ const int ssx = ctx->img.x_chroma_shift;
+ int plane;
+ ctx->img.planes[0] += mi_col * MI_SIZE;
+ for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
+ ctx->img.planes[plane] += mi_col * (MI_SIZE >> ssx);
+ }
+ ctx->img.d_w = VPXMIN(cm->tile_width, cm->mi_cols - mi_col) *
+ MI_SIZE;
+ }
+#endif // CONFIG_EXT_TILE
+
ctx->img.fb_priv = frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
img = &ctx->img;
return img;
unsigned int threads; /**< Maximum number of threads to use, default 1 */
unsigned int w; /**< Width */
unsigned int h; /**< Height */
+ int tile_row; /**< The index of row tile to be decoded.
+ Value -1 means to decode all row tiles. */
+ int tile_col; /**< The index of column tile to be decoded.
+ Value -1 means to decode all column tiles */
} vpx_codec_dec_cfg_t; /**< alias for struct vpx_codec_dec_cfg */
static const arg_def_t outbitdeptharg = ARG_DEF(
NULL, "output-bit-depth", 1, "Output bit-depth for decoded frames");
#endif
+#if CONFIG_EXT_TILE
+static const arg_def_t tiler = ARG_DEF(
+ NULL, "tile-row", 1, "Row index of tile to decode "
+ "(-1 for all rows)");
+static const arg_def_t tilec = ARG_DEF(
+ NULL, "tile-column", 1, "Column index of tile to decode "
+ "(-1 for all columns)");
+#endif // CONFIG_EXT_TILE
static const arg_def_t *all_args[] = {
&codecarg, &use_yv12, &use_i420, &flipuvarg, &rawvideo, &noblitarg,
#if CONFIG_VP9_HIGHBITDEPTH
&outbitdeptharg,
#endif
+#if CONFIG_EXT_TILE
+ &tiler, &tilec,
+#endif // CONFIG_EXT_TILE
NULL
};
#if CONFIG_VP9_HIGHBITDEPTH
unsigned int output_bit_depth = 0;
#endif
+#if CONFIG_EXT_TILE
+ int tile_row = -1;
+ int tile_col = -1;
+#endif // CONFIG_EXT_TILE
#if CONFIG_VP8_DECODER
vp8_postproc_cfg_t vp8_pp_cfg = {0};
int vp8_dbg_color_ref_frame = 0;
output_bit_depth = arg_parse_uint(&arg);
}
#endif
+#if CONFIG_EXT_TILE
+ else if (arg_match(&arg, &tiler, argi))
+ tile_row = arg_parse_int(&arg);
+ else if (arg_match(&arg, &tilec, argi))
+ tile_col = arg_parse_int(&arg);
+#endif // CONFIG_EXT_TILE
#if CONFIG_VP8_DECODER
else if (arg_match(&arg, &addnoise_level, argi)) {
postproc = 1;
if (!interface)
interface = get_vpx_decoder_by_index(0);
+#if CONFIG_EXT_TILE
+ cfg.tile_row = tile_row;
+ cfg.tile_col = tile_col;
+#endif // CONFIG_EXT_TILE
+
dec_flags = (postproc ? VPX_CODEC_USE_POSTPROC : 0) |
(ec_enabled ? VPX_CODEC_USE_ERROR_CONCEALMENT : 0) |
(frame_parallel ? VPX_CODEC_USE_FRAME_THREADING : 0);
}
#endif
+#if CONFIG_EXT_TILE
+ vpx_input_ctx.width = img->d_w;
+ vpx_input_ctx.height = img->d_h;
+#endif // CONFIG_EXT_TILE
+
if (single_file) {
if (use_y4m) {
char buf[Y4M_BUFFER_SIZE] = {0};
#if CONFIG_DECODERS
if (global->test_decode != TEST_DECODE_OFF) {
const VpxInterface *decoder = get_vpx_decoder_by_name(global->codec->name);
- vpx_codec_dec_init(&stream->decoder, decoder->codec_interface(), NULL, 0);
+ vpx_codec_dec_cfg_t cfg = { 0, 0, 0, -1, -1 };
+ vpx_codec_dec_init(&stream->decoder, decoder->codec_interface(), &cfg, 0);
}
#endif
}