From b6c5dbe9ef38c8e90c7b3930ab8e336051b67708 Mon Sep 17 00:00:00 2001 From: Jim Bankoski Date: Sun, 29 Sep 2013 10:27:11 -0700 Subject: [PATCH] mips dsp-ase r2 vp9 decoder extend module optimizations Adopted again to shepherd through system. Change-Id: If1b742618a95a2198ae2e30eec4b53ad1f7353e5 --- build/make/rtcd.sh | 2 + vpx_scale/mips/dspr2/yv12extend_dspr2.c | 149 ++++++++++++++++++++++++ vpx_scale/vpx_scale.mk | 3 + vpx_scale/vpx_scale_rtcd.sh | 4 +- 4 files changed, 156 insertions(+), 2 deletions(-) create mode 100644 vpx_scale/mips/dspr2/yv12extend_dspr2.c diff --git a/build/make/rtcd.sh b/build/make/rtcd.sh index 6cc36843b..9a8d97e32 100755 --- a/build/make/rtcd.sh +++ b/build/make/rtcd.sh @@ -290,9 +290,11 @@ static void setup_rtcd_internal(void) { $(set_function_pointers c $ALL_ARCHS) #if HAVE_DSPR2 +#if CONFIG_VP8 void dsputil_static_init(); dsputil_static_init(); #endif +#endif } #endif $(common_bottom) diff --git a/vpx_scale/mips/dspr2/yv12extend_dspr2.c b/vpx_scale/mips/dspr2/yv12extend_dspr2.c new file mode 100644 index 000000000..2c5cd1a87 --- /dev/null +++ b/vpx_scale/mips/dspr2/yv12extend_dspr2.c @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_config.h" +#include "vpx_scale/yv12config.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx_scale/vpx_scale.h" + +#if HAVE_DSPR2 +static void extend_plane(uint8_t *const src, int src_stride, + int width, int height, + int extend_top, int extend_left, + int extend_bottom, int extend_right) { + int i, j; + uint8_t *left_src, *right_src; + uint8_t *left_dst_start, *right_dst_start; + uint8_t *left_dst, *right_dst; + uint8_t *top_src, *bot_src; + uint8_t *top_dst, *bot_dst; + uint32_t left_pix; + uint32_t right_pix; + uint32_t linesize; + + /* copy the left and right most columns out */ + left_src = src; + right_src = src + width - 1; + left_dst_start = src - extend_left; + right_dst_start = src + width; + + for (i = height; i--; ) { + left_dst = left_dst_start; + right_dst = right_dst_start; + + __asm__ __volatile__ ( + "lb %[left_pix], 0(%[left_src]) \n\t" + "lb %[right_pix], 0(%[right_src]) \n\t" + "replv.qb %[left_pix], %[left_pix] \n\t" + "replv.qb %[right_pix], %[right_pix] \n\t" + + : [left_pix] "=&r" (left_pix), [right_pix] "=&r" (right_pix) + : [left_src] "r" (left_src), [right_src] "r" (right_src) + ); + + for (j = extend_left/4; j--; ) { + __asm__ __volatile__ ( + "sw %[left_pix], 0(%[left_dst]) \n\t" + "sw %[right_pix], 0(%[right_dst]) \n\t" + + : + : [left_dst] "r" (left_dst), [left_pix] "r" (left_pix), + [right_dst] "r" (right_dst), [right_pix] "r" (right_pix) + ); + + left_dst += 4; + right_dst += 4; + } + + for (j = extend_left%4; j--; ) { + __asm__ __volatile__ ( + "sb %[left_pix], 0(%[left_dst]) \n\t" + "sb %[right_pix], 0(%[right_dst]) \n\t" + + : + : [left_dst] "r" (left_dst), [left_pix] "r" (left_pix), + [right_dst] "r" (right_dst), [right_pix] "r" (right_pix) + ); + + left_dst += 1; + right_dst += 1; + } + + left_src += src_stride; + right_src += src_stride; + left_dst_start += src_stride; + right_dst_start += src_stride; + } + + /* Now copy the top and bottom lines into each line of the respective + * borders + */ + top_src = src - extend_left; + bot_src = src + src_stride * (height - 1) - extend_left; + top_dst = src + src_stride * (-extend_top) - extend_left; + bot_dst = src + src_stride * (height) - extend_left; + linesize = extend_left + extend_right + width; + + for (i = 0; i < extend_top; i++) { + vpx_memcpy(top_dst, top_src, linesize); + top_dst += src_stride; + } + + for (i = 0; i < extend_bottom; i++) { + vpx_memcpy(bot_dst, bot_src, linesize); + bot_dst += src_stride; + } +} + +static void extend_frame(YV12_BUFFER_CONFIG *const ybf, + int subsampling_x, int subsampling_y, + int ext_size) { + const int c_w = (ybf->y_crop_width + subsampling_x) >> subsampling_x; + const int c_h = (ybf->y_crop_height + subsampling_y) >> subsampling_y; + const int c_et = ext_size >> subsampling_y; + const int c_el = ext_size >> subsampling_x; + const int c_eb = (ext_size + ybf->y_height - ybf->y_crop_height + + subsampling_y) >> subsampling_y; + const int c_er = (ext_size + ybf->y_width - ybf->y_crop_width + + subsampling_x) >> subsampling_x; + + assert(ybf->y_height - ybf->y_crop_height < 16); + assert(ybf->y_width - ybf->y_crop_width < 16); + assert(ybf->y_height - ybf->y_crop_height >= 0); + assert(ybf->y_width - ybf->y_crop_width >= 0); + + extend_plane(ybf->y_buffer, ybf->y_stride, + ybf->y_crop_width, ybf->y_crop_height, + ext_size, ext_size, + ext_size + ybf->y_height - ybf->y_crop_height, + ext_size + ybf->y_width - ybf->y_crop_width); + + extend_plane(ybf->u_buffer, ybf->uv_stride, + c_w, c_h, c_et, c_el, c_eb, c_er); + + extend_plane(ybf->v_buffer, ybf->uv_stride, + c_w, c_h, c_et, c_el, c_eb, c_er); +} + +void vp9_extend_frame_borders_dspr2(YV12_BUFFER_CONFIG *ybf, + int subsampling_x, int subsampling_y) { + extend_frame(ybf, subsampling_x, subsampling_y, ybf->border); +} + +void vp9_extend_frame_inner_borders_dspr2(YV12_BUFFER_CONFIG *ybf, + int subsampling_x, + int subsampling_y) { + const int inner_bw = (ybf->border > VP9INNERBORDERINPIXELS) ? + VP9INNERBORDERINPIXELS : ybf->border; + extend_frame(ybf, subsampling_x, subsampling_y, inner_bw); +} +#endif diff --git a/vpx_scale/vpx_scale.mk b/vpx_scale/vpx_scale.mk index 76c11e792..50d3e9d8e 100644 --- a/vpx_scale/vpx_scale.mk +++ b/vpx_scale/vpx_scale.mk @@ -16,6 +16,9 @@ SCALE_SRCS-$(HAVE_NEON) += arm/neon/vp8_vpxyv12_copysrcframe_func_neon$(ASM) SCALE_SRCS-$(HAVE_NEON) += arm/neon/vp8_vpxyv12_extendframeborders_neon$(ASM) SCALE_SRCS-$(HAVE_NEON) += arm/neon/yv12extend_arm.c +#mips(dspr2) +SCALE_SRCS-$(HAVE_DSPR2) += mips/dspr2/yv12extend_dspr2.c + SCALE_SRCS-no += $(SCALE_SRCS_REMOVE-yes) $(eval $(call asm_offsets_template,\ diff --git a/vpx_scale/vpx_scale_rtcd.sh b/vpx_scale/vpx_scale_rtcd.sh index ea7b0e2e8..a5faf1148 100644 --- a/vpx_scale/vpx_scale_rtcd.sh +++ b/vpx_scale/vpx_scale_rtcd.sh @@ -27,8 +27,8 @@ specialize vpx_yv12_copy_y neon if [ "$CONFIG_VP9" = "yes" ]; then prototype void vp9_extend_frame_borders "struct yv12_buffer_config *ybf, int subsampling_x, int subsampling_y" - specialize vp9_extend_frame_borders + specialize vp9_extend_frame_borders dspr2 prototype void vp9_extend_frame_inner_borders "struct yv12_buffer_config *ybf, int subsampling_x, int subsampling_y" - specialize vp9_extend_frame_inner_borders_c + specialize vp9_extend_frame_inner_borders dspr2 fi -- 2.40.0