From e1d3c36525fd5aadb24610b6cbf2b8219d435933 Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Sat, 20 Sep 2014 02:25:13 -0700 Subject: [PATCH] Adds high bit-depth frame resize functions Change-Id: I35b015a759325d72d0da427c61a09f19f8e69697 --- vp9/encoder/vp9_encoder.c | 51 +++++- vp9/encoder/vp9_resize.c | 348 +++++++++++++++++++++++++++++++++++++- vp9/encoder/vp9_resize.h | 56 ++++++ 3 files changed, 450 insertions(+), 5 deletions(-) diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 95d8c9f23..bfd35d569 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -2167,8 +2167,14 @@ void vp9_write_yuv_rec_frame(VP9_COMMON *cm) { } #endif +#if CONFIG_VP9_HIGHBITDEPTH +static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst, + int bd) { +#else static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst) { +#endif // CONFIG_VP9_HIGHBITDEPTH // TODO(dkovalev): replace YV12_BUFFER_CONFIG with vpx_image_t int i; const uint8_t *const srcs[3] = {src->y_buffer, src->u_buffer, src->v_buffer}; @@ -2184,15 +2190,31 @@ static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src, const int dst_heights[3] = {dst->y_crop_height, dst->uv_crop_height, dst->uv_crop_height}; - for (i = 0; i < MAX_MB_PLANE; ++i) + for (i = 0; i < MAX_MB_PLANE; ++i) { +#if CONFIG_VP9_HIGHBITDEPTH + if (src->flags & YV12_FLAG_HIGHBITDEPTH) { + vp9_highbd_resize_plane(srcs[i], src_heights[i], src_widths[i], + src_strides[i], dsts[i], dst_heights[i], + dst_widths[i], dst_strides[i], bd); + } else { + vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i], + dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]); + } +#else vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i], dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]); - +#endif // CONFIG_VP9_HIGHBITDEPTH + } vp9_extend_frame_borders(dst); } +#if CONFIG_VP9_HIGHBITDEPTH +static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst, int bd) { +#else static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst) { +#endif // CONFIG_VP9_HIGHBITDEPTH const int src_w = src->y_crop_width; const int src_h = src->y_crop_height; const int dst_w = dst->y_crop_width; @@ -2216,10 +2238,24 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, src_stride + (x / factor) * src_w / dst_w; uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor); +#if CONFIG_VP9_HIGHBITDEPTH + if (src->flags & YV12_FLAG_HIGHBITDEPTH) { + vp9_high_convolve8(src_ptr, src_stride, dst_ptr, dst_stride, + kernel[x_q4 & 0xf], 16 * src_w / dst_w, + kernel[y_q4 & 0xf], 16 * src_h / dst_h, + 16 / factor, 16 / factor, bd); + } else { + vp9_convolve8(src_ptr, src_stride, dst_ptr, dst_stride, + kernel[x_q4 & 0xf], 16 * src_w / dst_w, + kernel[y_q4 & 0xf], 16 * src_h / dst_h, + 16 / factor, 16 / factor); + } +#else vp9_convolve8(src_ptr, src_stride, dst_ptr, dst_stride, kernel[x_q4 & 0xf], 16 * src_w / dst_w, kernel[y_q4 & 0xf], 16 * src_h / dst_h, 16 / factor, 16 / factor); +#endif // CONFIG_VP9_HIGHBITDEPTH } } } @@ -2388,9 +2424,14 @@ void vp9_scale_references(VP9_COMP *cpi) { cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, -#endif +#endif // CONFIG_VP9_HIGHBITDEPTH VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL); +#if CONFIG_VP9_HIGHBITDEPTH + scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf, + (int)cm->bit_depth); +#else scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf); +#endif // CONFIG_VP9_HIGHBITDEPTH cpi->scaled_ref_idx[ref_frame - 1] = new_fb; } else { cpi->scaled_ref_idx[ref_frame - 1] = idx; @@ -2766,7 +2807,11 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm, YV12_BUFFER_CONFIG *scaled) { if (cm->mi_cols * MI_SIZE != unscaled->y_width || cm->mi_rows * MI_SIZE != unscaled->y_height) { +#if CONFIG_VP9_HIGHBITDEPTH + scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth); +#else scale_and_extend_frame_nonnormative(unscaled, scaled); +#endif // CONFIG_VP9_HIGHBITDEPTH return scaled; } else { return unscaled; diff --git a/vp9/encoder/vp9_resize.c b/vp9/encoder/vp9_resize.c index 4e6efaeb9..4a8a52156 100644 --- a/vp9/encoder/vp9_resize.c +++ b/vp9/encoder/vp9_resize.c @@ -312,7 +312,7 @@ static void interpolate(const uint8_t *const input, int inlength, static void down2_symeven(const uint8_t *const input, int length, uint8_t *output) { // Actual filter len = 2 * filter_len_half. - static const int16_t *filter = vp9_down2_symeven_half_filter; + const int16_t *filter = vp9_down2_symeven_half_filter; const int filter_len_half = sizeof(vp9_down2_symeven_half_filter) / 2; int i, j; uint8_t *optr = output; @@ -368,7 +368,7 @@ static void down2_symeven(const uint8_t *const input, int length, static void down2_symodd(const uint8_t *const input, int length, uint8_t *output) { // Actual filter len = 2 * filter_len_half - 1. - static const int16_t *filter = vp9_down2_symodd_half_filter; + const int16_t *filter = vp9_down2_symodd_half_filter; const int filter_len_half = sizeof(vp9_down2_symodd_half_filter) / 2; int i, j; uint8_t *optr = output; @@ -529,6 +529,302 @@ void vp9_resize_plane(const uint8_t *const input, free(arrbuf); } +#if CONFIG_VP9_HIGHBITDEPTH +static void highbd_interpolate(const uint16_t *const input, int inlength, + uint16_t *output, int outlength, int bd) { + const int64_t delta = + (((uint64_t)inlength << 32) + outlength / 2) / outlength; + const int64_t offset = inlength > outlength ? + (((int64_t)(inlength - outlength) << 31) + outlength / 2) / outlength : + -(((int64_t)(outlength - inlength) << 31) + outlength / 2) / outlength; + uint16_t *optr = output; + int x, x1, x2, sum, k, int_pel, sub_pel; + int64_t y; + + const interp_kernel *interp_filters = + choose_interp_filter(inlength, outlength); + + x = 0; + y = offset; + while ((y >> INTERP_PRECISION_BITS) < (INTERP_TAPS / 2 - 1)) { + x++; + y += delta; + } + x1 = x; + x = outlength - 1; + y = delta * x + offset; + while ((y >> INTERP_PRECISION_BITS) + + (int64_t)(INTERP_TAPS / 2) >= inlength) { + x--; + y -= delta; + } + x2 = x; + if (x1 > x2) { + for (x = 0, y = offset; x < outlength; ++x, y += delta) { + const int16_t *filter; + int_pel = y >> INTERP_PRECISION_BITS; + sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; + filter = interp_filters[sub_pel]; + sum = 0; + for (k = 0; k < INTERP_TAPS; ++k) { + const int pk = int_pel - INTERP_TAPS / 2 + 1 + k; + sum += filter[k] * + input[(pk < 0 ? 0 : (pk >= inlength ? inlength - 1 : pk))]; + } + *optr++ = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); + } + } else { + // Initial part. + for (x = 0, y = offset; x < x1; ++x, y += delta) { + const int16_t *filter; + int_pel = y >> INTERP_PRECISION_BITS; + sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; + filter = interp_filters[sub_pel]; + sum = 0; + for (k = 0; k < INTERP_TAPS; ++k) + sum += filter[k] * + input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 ? + 0 : int_pel - INTERP_TAPS / 2 + 1 + k)]; + *optr++ = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); + } + // Middle part. + for (; x <= x2; ++x, y += delta) { + const int16_t *filter; + int_pel = y >> INTERP_PRECISION_BITS; + sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; + filter = interp_filters[sub_pel]; + sum = 0; + for (k = 0; k < INTERP_TAPS; ++k) + sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k]; + *optr++ = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); + } + // End part. + for (; x < outlength; ++x, y += delta) { + const int16_t *filter; + int_pel = y >> INTERP_PRECISION_BITS; + sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; + filter = interp_filters[sub_pel]; + sum = 0; + for (k = 0; k < INTERP_TAPS; ++k) + sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >= + inlength ? inlength - 1 : + int_pel - INTERP_TAPS / 2 + 1 + k)]; + *optr++ = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); + } + } +} + +static void highbd_down2_symeven(const uint16_t *const input, int length, + uint16_t *output, int bd) { + // Actual filter len = 2 * filter_len_half. + static const int16_t *filter = vp9_down2_symeven_half_filter; + const int filter_len_half = sizeof(vp9_down2_symeven_half_filter) / 2; + int i, j; + uint16_t *optr = output; + int l1 = filter_len_half; + int l2 = (length - filter_len_half); + l1 += (l1 & 1); + l2 += (l2 & 1); + if (l1 > l2) { + // Short input length. + for (i = 0; i < length; i += 2) { + int sum = (1 << (FILTER_BITS - 1)); + for (j = 0; j < filter_len_half; ++j) { + sum += (input[(i - j < 0 ? 0 : i - j)] + + input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) * + filter[j]; + } + sum >>= FILTER_BITS; + *optr++ = clip_pixel_high(sum, bd); + } + } else { + // Initial part. + for (i = 0; i < l1; i += 2) { + int sum = (1 << (FILTER_BITS - 1)); + for (j = 0; j < filter_len_half; ++j) { + sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + 1 + j]) * filter[j]; + } + sum >>= FILTER_BITS; + *optr++ = clip_pixel_high(sum, bd); + } + // Middle part. + for (; i < l2; i += 2) { + int sum = (1 << (FILTER_BITS - 1)); + for (j = 0; j < filter_len_half; ++j) { + sum += (input[i - j] + input[i + 1 + j]) * filter[j]; + } + sum >>= FILTER_BITS; + *optr++ = clip_pixel_high(sum, bd); + } + // End part. + for (; i < length; i += 2) { + int sum = (1 << (FILTER_BITS - 1)); + for (j = 0; j < filter_len_half; ++j) { + sum += (input[i - j] + + input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) * + filter[j]; + } + sum >>= FILTER_BITS; + *optr++ = clip_pixel_high(sum, bd); + } + } +} + +static void highbd_down2_symodd(const uint16_t *const input, int length, + uint16_t *output, int bd) { + // Actual filter len = 2 * filter_len_half - 1. + static const int16_t *filter = vp9_down2_symodd_half_filter; + const int filter_len_half = sizeof(vp9_down2_symodd_half_filter) / 2; + int i, j; + uint16_t *optr = output; + int l1 = filter_len_half - 1; + int l2 = (length - filter_len_half + 1); + l1 += (l1 & 1); + l2 += (l2 & 1); + if (l1 > l2) { + // Short input length. + for (i = 0; i < length; i += 2) { + int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; + for (j = 1; j < filter_len_half; ++j) { + sum += (input[(i - j < 0 ? 0 : i - j)] + + input[(i + j >= length ? length - 1 : i + j)]) * + filter[j]; + } + sum >>= FILTER_BITS; + *optr++ = clip_pixel_high(sum, bd); + } + } else { + // Initial part. + for (i = 0; i < l1; i += 2) { + int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; + for (j = 1; j < filter_len_half; ++j) { + sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + j]) * filter[j]; + } + sum >>= FILTER_BITS; + *optr++ = clip_pixel_high(sum, bd); + } + // Middle part. + for (; i < l2; i += 2) { + int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; + for (j = 1; j < filter_len_half; ++j) { + sum += (input[i - j] + input[i + j]) * filter[j]; + } + sum >>= FILTER_BITS; + *optr++ = clip_pixel_high(sum, bd); + } + // End part. + for (; i < length; i += 2) { + int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; + for (j = 1; j < filter_len_half; ++j) { + sum += (input[i - j] + input[(i + j >= length ? length - 1 : i + j)]) * + filter[j]; + } + sum >>= FILTER_BITS; + *optr++ = clip_pixel_high(sum, bd); + } + } +} + +static void highbd_resize_multistep(const uint16_t *const input, + int length, + uint16_t *output, + int olength, + uint16_t *buf, + int bd) { + int steps; + if (length == olength) { + memcpy(output, input, sizeof(uint16_t) * length); + return; + } + steps = get_down2_steps(length, olength); + + if (steps > 0) { + int s; + uint16_t *out = NULL; + uint16_t *tmpbuf = NULL; + uint16_t *otmp, *otmp2; + int filteredlength = length; + if (!tmpbuf) { + tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) * length); + otmp = tmpbuf; + } else { + otmp = buf; + } + otmp2 = otmp + get_down2_length(length, 1); + for (s = 0; s < steps; ++s) { + const int proj_filteredlength = get_down2_length(filteredlength, 1); + const uint16_t *const in = (s == 0 ? input : out); + if (s == steps - 1 && proj_filteredlength == olength) + out = output; + else + out = (s & 1 ? otmp2 : otmp); + if (filteredlength & 1) + highbd_down2_symodd(in, filteredlength, out, bd); + else + highbd_down2_symeven(in, filteredlength, out, bd); + filteredlength = proj_filteredlength; + } + if (filteredlength != olength) { + highbd_interpolate(out, filteredlength, output, olength, bd); + } + if (tmpbuf) + free(tmpbuf); + } else { + highbd_interpolate(input, length, output, olength, bd); + } +} + +static void highbd_fill_col_to_arr(uint16_t *img, int stride, int len, + uint16_t *arr) { + int i; + uint16_t *iptr = img; + uint16_t *aptr = arr; + for (i = 0; i < len; ++i, iptr += stride) { + *aptr++ = *iptr; + } +} + +static void highbd_fill_arr_to_col(uint16_t *img, int stride, int len, + uint16_t *arr) { + int i; + uint16_t *iptr = img; + uint16_t *aptr = arr; + for (i = 0; i < len; ++i, iptr += stride) { + *iptr = *aptr++; + } +} + +void vp9_highbd_resize_plane(const uint8_t *const input, + int height, + int width, + int in_stride, + uint8_t *output, + int height2, + int width2, + int out_stride, + int bd) { + int i; + uint16_t *intbuf = (uint16_t *)malloc(sizeof(uint16_t) * width2 * height); + uint16_t *tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) * + (width < height ? height : width)); + uint16_t *arrbuf = (uint16_t *)malloc(sizeof(uint16_t) * (height + height2)); + for (i = 0; i < height; ++i) { + highbd_resize_multistep(CONVERT_TO_SHORTPTR(input + in_stride * i), width, + intbuf + width2 * i, width2, tmpbuf, bd); + } + for (i = 0; i < width2; ++i) { + highbd_fill_col_to_arr(intbuf + i, width2, height, arrbuf); + highbd_resize_multistep(arrbuf, height, arrbuf + height, height2, tmpbuf, + bd); + highbd_fill_arr_to_col(CONVERT_TO_SHORTPTR(output + i), out_stride, height2, + arrbuf + height); + } + free(intbuf); + free(tmpbuf); + free(arrbuf); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + void vp9_resize_frame420(const uint8_t *const y, int y_stride, const uint8_t *const u, const uint8_t *const v, @@ -574,3 +870,51 @@ void vp9_resize_frame444(const uint8_t *const y, int y_stride, vp9_resize_plane(v, height, width, uv_stride, ov, oheight, owidth, ouv_stride); } + +#if CONFIG_VP9_HIGHBITDEPTH +void vp9_highbd_resize_frame420(const uint8_t *const y, + int y_stride, + const uint8_t *const u, const uint8_t *const v, + int uv_stride, + int height, int width, + uint8_t *oy, int oy_stride, + uint8_t *ou, uint8_t *ov, int ouv_stride, + int oheight, int owidth, int bd) { + vp9_highbd_resize_plane(y, height, width, y_stride, + oy, oheight, owidth, oy_stride, bd); + vp9_highbd_resize_plane(u, height / 2, width / 2, uv_stride, + ou, oheight / 2, owidth / 2, ouv_stride, bd); + vp9_highbd_resize_plane(v, height / 2, width / 2, uv_stride, + ov, oheight / 2, owidth / 2, ouv_stride, bd); +} + +void vp9_highbd_resize_frame422(const uint8_t *const y, int y_stride, + const uint8_t *const u, const uint8_t *const v, + int uv_stride, + int height, int width, + uint8_t *oy, int oy_stride, + uint8_t *ou, uint8_t *ov, int ouv_stride, + int oheight, int owidth, int bd) { + vp9_highbd_resize_plane(y, height, width, y_stride, + oy, oheight, owidth, oy_stride, bd); + vp9_highbd_resize_plane(u, height, width / 2, uv_stride, + ou, oheight, owidth / 2, ouv_stride, bd); + vp9_highbd_resize_plane(v, height, width / 2, uv_stride, + ov, oheight, owidth / 2, ouv_stride, bd); +} + +void vp9_highbd_resize_frame444(const uint8_t *const y, int y_stride, + const uint8_t *const u, const uint8_t *const v, + int uv_stride, + int height, int width, + uint8_t *oy, int oy_stride, + uint8_t *ou, uint8_t *ov, int ouv_stride, + int oheight, int owidth, int bd) { + vp9_highbd_resize_plane(y, height, width, y_stride, + oy, oheight, owidth, oy_stride, bd); + vp9_highbd_resize_plane(u, height, width, uv_stride, + ou, oheight, owidth, ouv_stride, bd); + vp9_highbd_resize_plane(v, height, width, uv_stride, + ov, oheight, owidth, ouv_stride, bd); +} +#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/vp9/encoder/vp9_resize.h b/vp9/encoder/vp9_resize.h index 1818cd47e..067af53f9 100644 --- a/vp9/encoder/vp9_resize.h +++ b/vp9/encoder/vp9_resize.h @@ -65,4 +65,60 @@ void vp9_resize_frame444(const uint8_t *const y, int oheight, int owidth); +#if CONFIG_VP9_HIGHBITDEPTH +void vp9_highbd_resize_plane(const uint8_t *const input, + int height, + int width, + int in_stride, + uint8_t *output, + int height2, + int width2, + int out_stride, + int bd); +void vp9_highbd_resize_frame420(const uint8_t *const y, + int y_stride, + const uint8_t *const u, + const uint8_t *const v, + int uv_stride, + int height, + int width, + uint8_t *oy, + int oy_stride, + uint8_t *ou, + uint8_t *ov, + int ouv_stride, + int oheight, + int owidth, + int bd); +void vp9_highbd_resize_frame422(const uint8_t *const y, + int y_stride, + const uint8_t *const u, + const uint8_t *const v, + int uv_stride, + int height, + int width, + uint8_t *oy, + int oy_stride, + uint8_t *ou, + uint8_t *ov, + int ouv_stride, + int oheight, + int owidth, + int bd); +void vp9_highbd_resize_frame444(const uint8_t *const y, + int y_stride, + const uint8_t *const u, + const uint8_t *const v, + int uv_stride, + int height, + int width, + uint8_t *oy, + int oy_stride, + uint8_t *ou, + uint8_t *ov, + int ouv_stride, + int oheight, + int owidth, + int bd); +#endif // CONFIG_VP9_HIGHBITDEPTH #endif // VP9_ENCODER_VP9_RESIZE_H_ -- 2.40.0