From: Urvang Joshi Date: Thu, 4 Aug 2016 18:00:17 +0000 (-0700) Subject: Palette code cleanup: X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d00002084064c686556afe498eee03c72bc02faa;p=libvpx Palette code cleanup: - Avoid some memcpy()s - Remove indices array - Make pre_indices array local - Avoid rounding twice - Other small simplifications Change-Id: Iac3236daaad04f21f54054cdd9504de13b942a07 --- diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h index 538beaa09..7da76cccd 100644 --- a/vp10/encoder/block.h +++ b/vp10/encoder/block.h @@ -68,8 +68,6 @@ typedef struct { typedef struct { uint8_t best_palette_color_map[MAX_SB_SQUARE]; float kmeans_data_buf[2 * MAX_SB_SQUARE]; - uint8_t kmeans_indices_buf[MAX_SB_SQUARE]; - uint8_t kmeans_pre_indices_buf[MAX_SB_SQUARE]; } PALETTE_BUFFER; typedef struct macroblock MACROBLOCK; diff --git a/vp10/encoder/palette.c b/vp10/encoder/palette.c index cbc3582e8..94e0c8755 100644 --- a/vp10/encoder/palette.c +++ b/vp10/encoder/palette.c @@ -13,10 +13,9 @@ static float calc_dist(const float *p1, const float *p2, int dim) { float dist = 0; - int i = 0; - + int i; for (i = 0; i < dim; ++i) { - float diff = p1[i] - roundf(p2[i]); + const float diff = p1[i] - roundf(p2[i]); dist += diff * diff; } return dist; @@ -25,13 +24,12 @@ static float calc_dist(const float *p1, const float *p2, int dim) { void vp10_calc_indices(const float *data, const float *centroids, uint8_t *indices, int n, int k, int dim) { int i, j; - float min_dist, this_dist; - for (i = 0; i < n; ++i) { - min_dist = calc_dist(data + i * dim, centroids, dim); + float min_dist = calc_dist(data + i * dim, centroids, dim); indices[i] = 0; for (j = 1; j < k; ++j) { - this_dist = calc_dist(data + i * dim, centroids + j * dim, dim); + const float this_dist = + calc_dist(data + i * dim, centroids + j * dim, dim); if (this_dist < min_dist) { min_dist = this_dist; indices[i] = j; @@ -90,17 +88,21 @@ static float calc_total_dist(const float *data, const float *centroids, return dist; } -int vp10_k_means(const float *data, float *centroids, uint8_t *indices, - uint8_t *pre_indices, int n, int k, int dim, int max_itr) { - int i = 0; - float pre_dist, this_dist; +void vp10_k_means(const float *data, float *centroids, uint8_t *indices, int n, + int k, int dim, int max_itr) { + int i; + float this_dist; float pre_centroids[2 * PALETTE_MAX_SIZE]; + uint8_t pre_indices[MAX_SB_SQUARE]; vp10_calc_indices(data, centroids, indices, n, k, dim); - pre_dist = calc_total_dist(data, centroids, indices, n, k, dim); - memcpy(pre_centroids, centroids, sizeof(pre_centroids[0]) * k * dim); - memcpy(pre_indices, indices, sizeof(pre_indices[0]) * n); - while (i < max_itr) { + this_dist = calc_total_dist(data, centroids, indices, n, k, dim); + + for (i = 0; i < max_itr; ++i) { + const float pre_dist = this_dist; + memcpy(pre_centroids, centroids, sizeof(pre_centroids[0]) * k * dim); + memcpy(pre_indices, indices, sizeof(pre_indices[0]) * n); + calc_centroids(data, centroids, indices, n, k, dim); vp10_calc_indices(data, centroids, indices, n, k, dim); this_dist = calc_total_dist(data, centroids, indices, n, k, dim); @@ -112,14 +114,7 @@ int vp10_k_means(const float *data, float *centroids, uint8_t *indices, } if (!memcmp(centroids, pre_centroids, sizeof(pre_centroids[0]) * k * dim)) break; - - memcpy(pre_centroids, centroids, sizeof(pre_centroids[0]) * k * dim); - memcpy(pre_indices, indices, sizeof(pre_indices[0]) * n); - pre_dist = this_dist; - ++i; } - - return i; } void vp10_insertion_sort(float *data, int n) { diff --git a/vp10/encoder/palette.h b/vp10/encoder/palette.h index 40d9ef915..3ec0f73bc 100644 --- a/vp10/encoder/palette.h +++ b/vp10/encoder/palette.h @@ -20,8 +20,8 @@ extern "C" { void vp10_insertion_sort(float *data, int n); void vp10_calc_indices(const float *data, const float *centroids, uint8_t *indices, int n, int k, int dim); -int vp10_k_means(const float *data, float *centroids, uint8_t *indices, - uint8_t *pre_indices, int n, int k, int dim, int max_itr); +void vp10_k_means(const float *data, float *centroids, uint8_t *indices, int n, + int k, int dim, int max_itr); int vp10_count_colors(const uint8_t *src, int stride, int rows, int cols); #if CONFIG_VP9_HIGHBITDEPTH int vp10_count_colors_highbd(const uint8_t *src8, int stride, int rows, diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index ffb88b18e..35165939a 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -1859,8 +1859,6 @@ static int rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x, int color_ctx, color_idx = 0; int color_order[PALETTE_MAX_SIZE]; float *const data = x->palette_buffer->kmeans_data_buf; - uint8_t *const indices = x->palette_buffer->kmeans_indices_buf; - uint8_t *const pre_indices = x->palette_buffer->kmeans_pre_indices_buf; float centroids[PALETTE_MAX_SIZE]; uint8_t *const color_map = xd->plane[0].color_index_map; float lb, ub, val; @@ -1914,8 +1912,7 @@ static int rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x, n >= 2; --n) { for (i = 0; i < n; ++i) centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2; - vp10_k_means(data, centroids, indices, pre_indices, rows * cols, - n, 1, max_itr); + vp10_k_means(data, centroids, color_map, rows * cols, n, 1, max_itr); vp10_insertion_sort(centroids, n); for (i = 0; i < n; ++i) centroids[i] = roundf(centroids[i]); @@ -1932,16 +1929,15 @@ static int rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x, #if CONFIG_VP9_HIGHBITDEPTH if (cpi->common.use_highbitdepth) for (i = 0; i < k; ++i) - pmi->palette_colors[i] = clip_pixel_highbd((int)lroundf(centroids[i]), - cpi->common.bit_depth); + pmi->palette_colors[i] = + clip_pixel_highbd((int)centroids[i], cpi->common.bit_depth); else #endif // CONFIG_VP9_HIGHBITDEPTH for (i = 0; i < k; ++i) - pmi->palette_colors[i] = clip_pixel((int)lroundf(centroids[i])); + pmi->palette_colors[i] = clip_pixel((int)centroids[i]); pmi->palette_size[0] = k; - vp10_calc_indices(data, centroids, indices, rows * cols, k, 1); - memcpy(color_map, indices, rows * cols * sizeof(*color_map)); + vp10_calc_indices(data, centroids, color_map, rows * cols, k, 1); super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL, bsize, *best_rd); @@ -3742,8 +3738,6 @@ static void rd_pick_palette_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x, float lb_u, ub_u, val_u; float lb_v, ub_v, val_v; float *const data = x->palette_buffer->kmeans_data_buf; - uint8_t *const indices = x->palette_buffer->kmeans_indices_buf; - uint8_t *const pre_indices = x->palette_buffer->kmeans_pre_indices_buf; float centroids[2 * PALETTE_MAX_SIZE]; uint8_t *const color_map = xd->plane[1].color_index_map; PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; @@ -3805,8 +3799,7 @@ static void rd_pick_palette_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x, centroids[i * 2 + 1] = lb_v + (2 * i + 1) * (ub_v - lb_v) / n / 2;; } - r = vp10_k_means(data, centroids, indices, pre_indices, rows * cols, n, - 2, max_itr); + vp10_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr); pmi->palette_size[1] = n; for (i = 1; i < 3; ++i) { for (j = 0; j < n; ++j) { @@ -3821,7 +3814,6 @@ static void rd_pick_palette_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x, clip_pixel((int)lroundf(centroids[j * 2 + i - 1])); } } - memcpy(color_map, indices, rows * cols * sizeof(*color_map)); super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, &this_sse, bsize, *best_rd); @@ -3853,7 +3845,7 @@ static void rd_pick_palette_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x, if (this_rd < *best_rd) { *best_rd = this_rd; *palette_mode_info = *pmi; - memcpy(best_palette_color_map, xd->plane[1].color_index_map, + memcpy(best_palette_color_map, color_map, rows * cols * sizeof(best_palette_color_map[0])); *mode_selected = DC_PRED; *rate = this_rate; @@ -8285,7 +8277,6 @@ static void restore_uv_color_map(VP10_COMP *cpi, MACROBLOCK *x) { const uint8_t *const src_u = x->plane[1].src.buf; const uint8_t *const src_v = x->plane[2].src.buf; float *const data = x->palette_buffer->kmeans_data_buf; - uint8_t *const indices = x->palette_buffer->kmeans_indices_buf; float centroids[2 * PALETTE_MAX_SIZE]; uint8_t *const color_map = xd->plane[1].color_index_map; int r, c; @@ -8321,9 +8312,8 @@ static void restore_uv_color_map(VP10_COMP *cpi, MACROBLOCK *x) { } } - vp10_calc_indices(data, centroids, indices, rows * cols, + vp10_calc_indices(data, centroids, color_map, rows * cols, pmi->palette_size[1], 2); - memcpy(color_map, indices, rows * cols * sizeof(*color_map)); } #if CONFIG_EXT_INTRA