From 577d4fa792f6e12124b116b52ec23294201f18bc Mon Sep 17 00:00:00 2001 From: Shiyou Yin Date: Wed, 13 Sep 2017 16:20:21 +0800 Subject: [PATCH] vp8: [loongson] optimize idct with mmi 1. vp8_dequant_idct_add_y_block_mmi 2. vp8_dequant_idct_add_uv_block_mmi Change-Id: I9987147be2685ac79d4b045d1d56f6709ee1223c --- vp8/common/mips/mmi/idct_blk_mmi.c | 71 ++++++++++++++++++++++++++++++ vp8/common/rtcd_defs.pl | 4 +- vp8/vp8_common.mk | 1 + 3 files changed, 74 insertions(+), 2 deletions(-) create mode 100644 vp8/common/mips/mmi/idct_blk_mmi.c diff --git a/vp8/common/mips/mmi/idct_blk_mmi.c b/vp8/common/mips/mmi/idct_blk_mmi.c new file mode 100644 index 000000000..f6020ab46 --- /dev/null +++ b/vp8/common/mips/mmi/idct_blk_mmi.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vp8_rtcd.h" +#include "vpx_mem/vpx_mem.h" + +void vp8_dequant_idct_add_y_block_mmi(int16_t *q, int16_t *dq, uint8_t *dst, + int stride, int8_t *eobs) { + int i, j; + + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + if (*eobs++ > 1) { + vp8_dequant_idct_add_mmi(q, dq, dst, stride); + } else { + vp8_dc_only_idct_add_mmi(q[0] * dq[0], dst, stride, dst, stride); + memset(q, 0, 2 * sizeof(q[0])); + } + + q += 16; + dst += 4; + } + + dst += 4 * stride - 16; + } +} + +void vp8_dequant_idct_add_uv_block_mmi(int16_t *q, int16_t *dq, uint8_t *dstu, + uint8_t *dstv, int stride, + int8_t *eobs) { + int i, j; + + for (i = 0; i < 2; i++) { + for (j = 0; j < 2; j++) { + if (*eobs++ > 1) { + vp8_dequant_idct_add_mmi(q, dq, dstu, stride); + } else { + vp8_dc_only_idct_add_mmi(q[0] * dq[0], dstu, stride, dstu, stride); + memset(q, 0, 2 * sizeof(q[0])); + } + + q += 16; + dstu += 4; + } + + dstu += 4 * stride - 8; + } + + for (i = 0; i < 2; i++) { + for (j = 0; j < 2; j++) { + if (*eobs++ > 1) { + vp8_dequant_idct_add_mmi(q, dq, dstv, stride); + } else { + vp8_dc_only_idct_add_mmi(q[0] * dq[0], dstv, stride, dstv, stride); + memset(q, 0, 2 * sizeof(q[0])); + } + + q += 16; + dstv += 4; + } + + dstv += 4 * stride - 8; + } +} diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl index be2ac0054..ece2785eb 100644 --- a/vp8/common/rtcd_defs.pl +++ b/vp8/common/rtcd_defs.pl @@ -28,10 +28,10 @@ add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char specialize qw/vp8_dequant_idct_add mmx neon dspr2 msa mmi/; add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs"; -specialize qw/vp8_dequant_idct_add_y_block sse2 neon dspr2 msa/; +specialize qw/vp8_dequant_idct_add_y_block sse2 neon dspr2 msa mmi/; add_proto qw/void vp8_dequant_idct_add_uv_block/, "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs"; -specialize qw/vp8_dequant_idct_add_uv_block sse2 neon dspr2 msa/; +specialize qw/vp8_dequant_idct_add_uv_block sse2 neon dspr2 msa mmi/; # # Loopfilter diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index 5813c81c4..246fe6a67 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -122,6 +122,7 @@ VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/loopfilter_filters_mmi.c VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/idctllm_mmi.c VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/dequantize_mmi.c VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/copymem_mmi.c +VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/idct_blk_mmi.c ifeq ($(CONFIG_POSTPROC),yes) VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/mfqe_msa.c -- 2.40.0