From: Shiyou Yin Date: Wed, 6 Sep 2017 09:57:16 +0000 (+0800) Subject: vp8: [loongson] optimize copymen with mmi X-Git-Tag: v1.7.0~148^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=73102d1ed20d79a22b977555e775d199101de53b;p=libvpx vp8: [loongson] optimize copymen with mmi 1. vp8_copy_mem16x16_mmi 2. vp8_copy_mem8x8_mmi 3. vp8_copy_mem8x4_mmi Change-Id: I3de29a11fa7402df0e48bbb944440b1e66498a65 --- diff --git a/vp8/common/mips/mmi/copymem_mmi.c b/vp8/common/mips/mmi/copymem_mmi.c new file mode 100644 index 000000000..86a32aa9e --- /dev/null +++ b/vp8/common/mips/mmi/copymem_mmi.c @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2017 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vp8_rtcd.h" +#include "vpx_ports/asmdefs_mmi.h" + +#define COPY_MEM_16X2 \ + "gsldlc1 %[ftmp0], 0x07(%[src]) \n\t" \ + "gsldrc1 %[ftmp0], 0x00(%[src]) \n\t" \ + "ldl %[tmp0], 0x0f(%[src]) \n\t" \ + "ldr %[tmp0], 0x08(%[src]) \n\t" \ + MMI_ADDU(%[src], %[src], %[src_stride]) \ + "gssdlc1 %[ftmp0], 0x07(%[dst]) \n\t" \ + "gssdrc1 %[ftmp0], 0x00(%[dst]) \n\t" \ + "sdl %[tmp0], 0x0f(%[dst]) \n\t" \ + "sdr %[tmp0], 0x08(%[dst]) \n\t" \ + MMI_ADDU(%[dst], %[dst], %[dst_stride]) \ + "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ + "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ + "ldl %[tmp1], 0x0f(%[src]) \n\t" \ + "ldr %[tmp1], 0x08(%[src]) \n\t" \ + MMI_ADDU(%[src], %[src], %[src_stride]) \ + "gssdlc1 %[ftmp1], 0x07(%[dst]) \n\t" \ + "gssdrc1 %[ftmp1], 0x00(%[dst]) \n\t" \ + "sdl %[tmp1], 0x0f(%[dst]) \n\t" \ + "sdr %[tmp1], 0x08(%[dst]) \n\t" \ + MMI_ADDU(%[dst], %[dst], %[dst_stride]) + +#define COPY_MEM_8X2 \ + "gsldlc1 %[ftmp0], 0x07(%[src]) \n\t" \ + "gsldrc1 %[ftmp0], 0x00(%[src]) \n\t" \ + MMI_ADDU(%[src], %[src], %[src_stride]) \ + "ldl %[tmp0], 0x07(%[src]) \n\t" \ + "ldr %[tmp0], 0x00(%[src]) \n\t" \ + MMI_ADDU(%[src], %[src], %[src_stride]) \ + \ + "gssdlc1 %[ftmp0], 0x07(%[dst]) \n\t" \ + "gssdrc1 %[ftmp0], 0x00(%[dst]) \n\t" \ + MMI_ADDU(%[dst], %[dst], %[dst_stride]) \ + "sdl %[tmp0], 0x07(%[dst]) \n\t" \ + "sdr %[tmp0], 0x00(%[dst]) \n\t" \ + MMI_ADDU(%[dst], %[dst], %[dst_stride]) + +void vp8_copy_mem16x16_mmi(unsigned char *src, int src_stride, + unsigned char *dst, int dst_stride) { + double ftmp[2]; + uint64_t tmp[2]; + uint8_t loop_count = 4; + + /* clang-format off */ + __asm__ volatile ( + "1: \n\t" + COPY_MEM_16X2 + COPY_MEM_16X2 + MMI_ADDIU(%[loop_count], %[loop_count], -0x01) + "bnez %[loop_count], 1b \n\t" + : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), + [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), + [loop_count]"+&r"(loop_count), + [dst]"+&r"(dst), [src]"+&r"(src) + : [src_stride]"r"((mips_reg)src_stride), + [dst_stride]"r"((mips_reg)dst_stride) + : "memory" + ); + /* clang-format on */ +} + +void vp8_copy_mem8x8_mmi(unsigned char *src, int src_stride, unsigned char *dst, + int dst_stride) { + double ftmp[2]; + uint64_t tmp[1]; + uint8_t loop_count = 4; + + /* clang-format off */ + __asm__ volatile ( + "1: \n\t" + COPY_MEM_8X2 + MMI_ADDIU(%[loop_count], %[loop_count], -0x01) + "bnez %[loop_count], 1b \n\t" + : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), + [tmp0]"=&r"(tmp[0]), [loop_count]"+&r"(loop_count), + [dst]"+&r"(dst), [src]"+&r"(src) + : [src_stride]"r"((mips_reg)src_stride), + [dst_stride]"r"((mips_reg)dst_stride) + : "memory" + ); + /* clang-format on */ +} + +void vp8_copy_mem8x4_mmi(unsigned char *src, int src_stride, unsigned char *dst, + int dst_stride) { + double ftmp[2]; + uint64_t tmp[1]; + + /* clang-format off */ + __asm__ volatile ( + COPY_MEM_8X2 + COPY_MEM_8X2 + : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), + [tmp0]"=&r"(tmp[0]), + [dst]"+&r"(dst), [src]"+&r"(src) + : [src_stride]"r"((mips_reg)src_stride), + [dst_stride]"r"((mips_reg)dst_stride) + : "memory" + ); + /* clang-format on */ +} diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl index ce5782363..7a04ef17f 100644 --- a/vp8/common/rtcd_defs.pl +++ b/vp8/common/rtcd_defs.pl @@ -104,13 +104,13 @@ specialize qw/vp8_dc_only_idct_add mmx neon dspr2 msa mmi/; # RECON # add_proto qw/void vp8_copy_mem16x16/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"; -specialize qw/vp8_copy_mem16x16 sse2 neon dspr2 msa/; +specialize qw/vp8_copy_mem16x16 sse2 neon dspr2 msa mmi/; add_proto qw/void vp8_copy_mem8x8/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"; -specialize qw/vp8_copy_mem8x8 mmx neon dspr2 msa/; +specialize qw/vp8_copy_mem8x8 mmx neon dspr2 msa mmi/; add_proto qw/void vp8_copy_mem8x4/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"; -specialize qw/vp8_copy_mem8x4 mmx neon dspr2 msa/; +specialize qw/vp8_copy_mem8x4 mmx neon dspr2 msa mmi/; # # Postproc diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index 3aa3b7db6..5813c81c4 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -121,6 +121,7 @@ VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/sixtap_filter_mmi.c VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/loopfilter_filters_mmi.c VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/idctllm_mmi.c VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/dequantize_mmi.c +VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/copymem_mmi.c ifeq ($(CONFIG_POSTPROC),yes) VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/mfqe_msa.c