From b7559258a45e4ad67cc492ca02e3c45319bd843e Mon Sep 17 00:00:00 2001
From: Dmitry Kovalev <dkovalev@google.com>
Date: Mon, 8 Jul 2013 14:54:04 -0700
Subject: [PATCH] Using mi_cols instead of mb_cols.

Eliminating usage of mb-units, switching to mi-units. Adding
ALIGN_POWER_OF_TWO macro.

Change-Id: I2491c969f713207c062011878b57e4e531818607
---
 vp9/common/vp9_alloccommon.c  | 11 +++++------
 vp9/common/vp9_common.h       | 13 ++++---------
 vp9/common/vp9_enums.h        |  8 +++++---
 vp9/common/vp9_onyxc_int.h    |  4 ++--
 vp9/common/vp9_tile_common.c  |  8 ++++++--
 vp9/decoder/vp9_decodframe.c  | 11 ++++++-----
 vp9/encoder/vp9_bitstream.c   |  4 ++--
 vp9/encoder/vp9_encodeframe.c | 14 +++++++-------
 vp9/vp9_iface_common.h        |  2 +-
 9 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index 566fd5059..96b27bf7c 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -95,9 +95,8 @@ static void setup_mi(VP9_COMMON *cm) {
 int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
   int i, mi_cols;
 
-  // Our internal buffers are always multiples of 16
-  const int aligned_width = multiple8(width);
-  const int aligned_height = multiple8(height);
+  const int aligned_width = ALIGN_POWER_OF_TWO(width, LOG2_MI_SIZE);
+  const int aligned_height = ALIGN_POWER_OF_TWO(height, LOG2_MI_SIZE);
   const int ss_x = oci->subsampling_x;
   const int ss_y = oci->subsampling_y;
   int mi_size;
@@ -147,7 +146,7 @@ int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
 
   // FIXME(jkoleszar): allocate subsampled arrays for U/V once subsampling
   // information is exposed at this level
-  mi_cols = mi_cols_aligned_to_sb(oci);
+  mi_cols = mi_cols_aligned_to_sb(oci->mi_cols);
 
   // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm
   // block where mi unit size is 8x8.
@@ -198,8 +197,8 @@ void vp9_initialize_common() {
 }
 
 void vp9_update_frame_size(VP9_COMMON *cm) {
-  const int aligned_width = multiple8(cm->width);
-  const int aligned_height = multiple8(cm->height);
+  const int aligned_width = ALIGN_POWER_OF_TWO(cm->width, LOG2_MI_SIZE);
+  const int aligned_height = ALIGN_POWER_OF_TWO(cm->height, LOG2_MI_SIZE);
 
   set_mb_mi(cm, aligned_width, aligned_height);
   setup_mi(cm);
diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h
index a1dca1bf4..179690652 100644
--- a/vp9/common/vp9_common.h
+++ b/vp9/common/vp9_common.h
@@ -22,12 +22,11 @@
 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
 
-#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n))
+#define ROUND_POWER_OF_TWO(value, n) \
+    (((value) + (1 << ((n) - 1))) >> (n))
 
-/* If we don't want to use ROUND_POWER_OF_TWO macro
-static INLINE int16_t round_power_of_two(int16_t value, int n) {
-  return (value + (1 << (n - 1))) >> n;
-}*/
+#define ALIGN_POWER_OF_TWO(value, n) \
+    (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
 
 // Only need this for fixed-size arrays, for structs just assign.
 #define vp9_copy(dest, src) {            \
@@ -56,10 +55,6 @@ static INLINE double fclamp(double value, double low, double high) {
   return value < low ? low : (value > high ? high : value);
 }
 
-static INLINE int multiple8(int value) {
-  return (value + 7) & ~7;
-}
-
 static int get_unsigned_bits(unsigned int num_values) {
   int cat = 0;
   if (num_values <= 1)
diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h
index 89a925397..dfd6a4a3f 100644
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -14,10 +14,12 @@
 #include "./vpx_config.h"
 
 #define LOG2_MI_SIZE 3
+#define LOG2_MI_BLOCK_SIZE (6 - LOG2_MI_SIZE)  // 64 = 2^6
 
-#define MI_SIZE (1 << LOG2_MI_SIZE)
-#define MI_MASK ((64 >> LOG2_MI_SIZE) - 1)
-#define MI_BLOCK_SIZE (64 / MI_SIZE)
+#define MI_SIZE (1 << LOG2_MI_SIZE)  // pixels per mi-unit
+#define MI_BLOCK_SIZE (1 << LOG2_MI_BLOCK_SIZE)  // mi-units per max block
+
+#define MI_MASK (MI_BLOCK_SIZE - 1)
 
 typedef enum BLOCK_SIZE_TYPE {
   BLOCK_SIZE_AB4X4,
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index abbf73f19..f31d5a21c 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -301,8 +301,8 @@ static void ref_cnt_fb(int *buf, int *idx, int new_idx) {
   buf[new_idx]++;
 }
 
-static int mi_cols_aligned_to_sb(VP9_COMMON *cm) {
-  return 2 * ((cm->mb_cols + 3) & ~3);
+static int mi_cols_aligned_to_sb(int n_mis) {
+  return ALIGN_POWER_OF_TWO(n_mis, LOG2_MI_BLOCK_SIZE);
 }
 
 static INLINE void set_partition_seg_context(VP9_COMMON *cm,
diff --git a/vp9/common/vp9_tile_common.c b/vp9/common/vp9_tile_common.c
index 95296ad6f..208cbb957 100644
--- a/vp9/common/vp9_tile_common.c
+++ b/vp9/common/vp9_tile_common.c
@@ -15,10 +15,14 @@
 #define MIN_TILE_WIDTH_SBS (MIN_TILE_WIDTH >> 6)
 #define MAX_TILE_WIDTH_SBS (MAX_TILE_WIDTH >> 6)
 
+static int to_sbs(n_mis) {
+  return mi_cols_aligned_to_sb(n_mis) >> LOG2_MI_BLOCK_SIZE;
+}
+
 static void vp9_get_tile_offsets(VP9_COMMON *cm, int *min_tile_off,
                                  int *max_tile_off, int tile_idx,
                                  int log2_n_tiles, int n_mis) {
-  const int n_sbs = (n_mis + 7) >> 3;
+  const int n_sbs = to_sbs(n_mis);
   const int sb_off1 =  (tile_idx      * n_sbs) >> log2_n_tiles;
   const int sb_off2 = ((tile_idx + 1) * n_sbs) >> log2_n_tiles;
 
@@ -43,7 +47,7 @@ void vp9_get_tile_row_offsets(VP9_COMMON *cm, int tile_row_idx) {
 
 void vp9_get_tile_n_bits(VP9_COMMON *cm, int *min_log2_n_tiles_ptr,
                          int *delta_log2_n_tiles) {
-  const int sb_cols = (cm->mb_cols + 3) >> 2;
+  const int sb_cols = to_sbs(cm->mi_cols);
   int min_log2_n_tiles, max_log2_n_tiles;
 
   for (max_log2_n_tiles = 0;
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 5ea75973d..c0c74f6cc 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -694,16 +694,17 @@ static void decode_tiles(VP9D_COMP *pbi,
   VP9_COMMON *const pc = &pbi->common;
 
   const uint8_t *data_ptr = data + first_partition_size;
-  const uint8_t* const data_end = pbi->source + pbi->source_sz;
+  const uint8_t *const data_end = pbi->source + pbi->source_sz;
+  const int aligned_mi_cols = mi_cols_aligned_to_sb(pc->mi_cols);
   int tile_row, tile_col;
 
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
-  vpx_memset(pc->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 2 *
-                                      MAX_MB_PLANE * mi_cols_aligned_to_sb(pc));
+  vpx_memset(pc->above_context[0], 0,
+             sizeof(ENTROPY_CONTEXT) * 2 * MAX_MB_PLANE * aligned_mi_cols);
 
-  vpx_memset(pc->above_seg_context, 0, sizeof(PARTITION_CONTEXT) *
-                                       mi_cols_aligned_to_sb(pc));
+  vpx_memset(pc->above_seg_context, 0,
+             sizeof(PARTITION_CONTEXT) * aligned_mi_cols);
 
   if (pbi->oxcf.inv_tile_order) {
     const int n_cols = pc->tile_columns;
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 26456e021..530c9b107 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -1476,8 +1476,8 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, unsigned long *size) {
     unsigned char *data_ptr = cx_data + header_bc.pos;
     TOKENEXTRA *tok[4][1 << 6], *tok_end;
 
-    vpx_memset(cpi->common.above_seg_context, 0, sizeof(PARTITION_CONTEXT) *
-               mi_cols_aligned_to_sb(&cpi->common));
+    vpx_memset(pc->above_seg_context, 0, sizeof(PARTITION_CONTEXT) *
+               mi_cols_aligned_to_sb(pc->mi_cols));
     tok[0][0] = cpi->tok;
     for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) {
       if (tile_row) {
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 1a42961b5..3d3079a9f 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1690,9 +1690,10 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
 }
 
 static void init_encode_frame_mb_context(VP9_COMP *cpi) {
-  MACROBLOCK * const x = &cpi->mb;
-  VP9_COMMON * const cm = &cpi->common;
-  MACROBLOCKD * const xd = &x->e_mbd;
+  MACROBLOCK *const x = &cpi->mb;
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
 
   x->act_zbin_adj = 0;
   cpi->seg0_idx = 0;
@@ -1735,11 +1736,10 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
 
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
-  vpx_memset(
-      cm->above_context[0], 0,
-      sizeof(ENTROPY_CONTEXT) * 2 * MAX_MB_PLANE * mi_cols_aligned_to_sb(cm));
+  vpx_memset(cm->above_context[0], 0,
+             sizeof(ENTROPY_CONTEXT) * 2 * MAX_MB_PLANE * aligned_mi_cols);
   vpx_memset(cm->above_seg_context, 0,
-             sizeof(PARTITION_CONTEXT) * mi_cols_aligned_to_sb(cm));
+             sizeof(PARTITION_CONTEXT) * aligned_mi_cols);
 }
 
 static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
diff --git a/vp9/vp9_iface_common.h b/vp9/vp9_iface_common.h
index dc41d77d1..0c1f37368 100644
--- a/vp9/vp9_iface_common.h
+++ b/vp9/vp9_iface_common.h
@@ -29,7 +29,7 @@ static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG  *yv12,
     img->fmt = VPX_IMG_FMT_I420;
   }
   img->w = yv12->y_stride;
-  img->h = multiple8(yv12->y_height + 2 * VP9BORDERINPIXELS);
+  img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9BORDERINPIXELS, 3);
   img->d_w = yv12->y_crop_width;
   img->d_h = yv12->y_crop_height;
   img->x_chroma_shift = yv12->uv_width < yv12->y_width;
-- 
2.40.0