From: Shiyou Yin Date: Wed, 6 Sep 2017 00:51:21 +0000 (+0800) Subject: vp8: [loongson] optimize idctllm with mmi X-Git-Tag: v1.7.0~172 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5b558592f564be258671f73175d5a4f987cf321c;p=libvpx vp8: [loongson] optimize idctllm with mmi 1. vp8_short_idct4x4llm_mmi 2. vp8_short_inv_walsh4x4_mmi 3. vp8_dc_only_idct_add_mmi Change-Id: I616923681e79d78607a4988608fc39df77b093f4 --- diff --git a/test/idct_test.cc b/test/idct_test.cc index b58c29a3e..3700374d7 100644 --- a/test/idct_test.cc +++ b/test/idct_test.cc @@ -169,4 +169,9 @@ INSTANTIATE_TEST_CASE_P(MMX, IDCTTest, INSTANTIATE_TEST_CASE_P(MSA, IDCTTest, ::testing::Values(vp8_short_idct4x4llm_msa)); #endif // HAVE_MSA + +#if HAVE_MMI +INSTANTIATE_TEST_CASE_P(MMI, IDCTTest, + ::testing::Values(vp8_short_idct4x4llm_mmi)); +#endif // HAVE_MMI } diff --git a/vp8/common/mips/mmi/idctllm_mmi.c b/vp8/common/mips/mmi/idctllm_mmi.c new file mode 100644 index 000000000..5e48f5916 --- /dev/null +++ b/vp8/common/mips/mmi/idctllm_mmi.c @@ -0,0 +1,328 @@ +/* + * Copyright (c) 2017 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vp8_rtcd.h" +#include "vpx_ports/mem.h" +#include "vpx_ports/asmdefs_mmi.h" + +#define TRANSPOSE_4H \ + "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \ + MMI_LI(%[tmp0], 0x93) \ + "mtc1 %[tmp0], %[ftmp10] \n\t" \ + "punpcklhw %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \ + "punpcklhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \ + "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ + "or %[ftmp5], %[ftmp5], %[ftmp9] \n\t" \ + "punpckhhw %[ftmp6], %[ftmp1], %[ftmp0] \n\t" \ + "punpckhhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \ + "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ + "or %[ftmp6], %[ftmp6], %[ftmp9] \n\t" \ + "punpcklhw %[ftmp7], %[ftmp3], %[ftmp0] \n\t" \ + "punpcklhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \ + "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ + "or %[ftmp7], %[ftmp7], %[ftmp9] \n\t" \ + "punpckhhw %[ftmp8], %[ftmp3], %[ftmp0] \n\t" \ + "punpckhhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \ + "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ + "or %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \ + "punpcklwd %[ftmp1], %[ftmp5], %[ftmp7] \n\t" \ + "punpckhwd %[ftmp2], %[ftmp5], %[ftmp7] \n\t" \ + "punpcklwd %[ftmp3], %[ftmp6], %[ftmp8] \n\t" \ + "punpckhwd %[ftmp4], %[ftmp6], %[ftmp8] \n\t" + +void vp8_short_idct4x4llm_mmi(int16_t *input, unsigned char *pred_ptr, + int pred_stride, unsigned char *dst_ptr, + int dst_stride) { + double ftmp[12]; + uint32_t tmp[0]; + DECLARE_ALIGNED(8, const uint64_t, ff_ph_04) = { 0x0004000400040004ULL }; + DECLARE_ALIGNED(8, const uint64_t, ff_ph_4e7b) = { 0x4e7b4e7b4e7b4e7bULL }; + DECLARE_ALIGNED(8, const uint64_t, ff_ph_22a3) = { 0x22a322a322a322a3ULL }; + + __asm__ volatile ( + MMI_LI(%[tmp0], 0x02) + "mtc1 %[tmp0], %[ftmp11] \n\t" + "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" + + "gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t" + "gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t" + "gsldlc1 %[ftmp2], 0x0f(%[ip]) \n\t" + "gsldrc1 %[ftmp2], 0x08(%[ip]) \n\t" + "gsldlc1 %[ftmp3], 0x17(%[ip]) \n\t" + "gsldrc1 %[ftmp3], 0x10(%[ip]) \n\t" + "gsldlc1 %[ftmp4], 0x1f(%[ip]) \n\t" + "gsldrc1 %[ftmp4], 0x18(%[ip]) \n\t" + + // ip[0...3] + ip[8...11] + "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t" + // ip[0...3] - ip[8...11] + "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t" + // (ip[12...15] * sinpi8sqrt2) >> 16 + "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t" + "pmulhh %[ftmp7], %[ftmp9], %[ff_ph_22a3] \n\t" + // (ip[ 4... 7] * sinpi8sqrt2) >> 16 + "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t" + "pmulhh %[ftmp8], %[ftmp9], %[ff_ph_22a3] \n\t" + // ip[ 4... 7] + ((ip[ 4... 7] * cospi8sqrt2minus1) >> 16) + "pmulhh %[ftmp9], %[ftmp2], %[ff_ph_4e7b] \n\t" + "paddh %[ftmp9], %[ftmp9], %[ftmp2] \n\t" + // ip[12...15] + ((ip[12...15] * cospi8sqrt2minus1) >> 16) + "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t" + "paddh %[ftmp10], %[ftmp10], %[ftmp4] \n\t" + + "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t" + "paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t" + "paddh %[ftmp2], %[ftmp6], %[ftmp8] \n\t" + "psubh %[ftmp2], %[ftmp2], %[ftmp10] \n\t" + "psubh %[ftmp3], %[ftmp6], %[ftmp8] \n\t" + "paddh %[ftmp3], %[ftmp3], %[ftmp10] \n\t" + "psubh %[ftmp4], %[ftmp5], %[ftmp7] \n\t" + "psubh %[ftmp4], %[ftmp4], %[ftmp9] \n\t" + + TRANSPOSE_4H + // a + "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t" + // b + "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t" + // c + "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t" + "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t" + "psubh %[ftmp7], %[ftmp9], %[ftmp4] \n\t" + "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t" + "psubh %[ftmp7], %[ftmp7], %[ftmp10] \n\t" + // d + "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t" + "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t" + "paddh %[ftmp8], %[ftmp9], %[ftmp2] \n\t" + "pmulhh %[ftmp10], %[ftmp2], %[ff_ph_4e7b] \n\t" + "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t" + + MMI_LI(%[tmp0], 0x03) + "mtc1 %[tmp0], %[ftmp11] \n\t" + // a + d + "paddh %[ftmp1], %[ftmp5], %[ftmp8] \n\t" + "paddh %[ftmp1], %[ftmp1], %[ff_ph_04] \n\t" + "psrah %[ftmp1], %[ftmp1], %[ftmp11] \n\t" + // b + c + "paddh %[ftmp2], %[ftmp6], %[ftmp7] \n\t" + "paddh %[ftmp2], %[ftmp2], %[ff_ph_04] \n\t" + "psrah %[ftmp2], %[ftmp2], %[ftmp11] \n\t" + // b - c + "psubh %[ftmp3], %[ftmp6], %[ftmp7] \n\t" + "paddh %[ftmp3], %[ftmp3], %[ff_ph_04] \n\t" + "psrah %[ftmp3], %[ftmp3], %[ftmp11] \n\t" + // a - d + "psubh %[ftmp4], %[ftmp5], %[ftmp8] \n\t" + "paddh %[ftmp4], %[ftmp4], %[ff_ph_04] \n\t" + "psrah %[ftmp4], %[ftmp4], %[ftmp11] \n\t" + + TRANSPOSE_4H +#if _MIPS_SIM == _ABIO32 + "ulw %[tmp0], 0x00(%[pred_prt]) \n\t" + "mtc1 %[tmp0], %[ftmp5] \n\t" +#else + "gslwlc1 %[ftmp5], 0x03(%[pred_ptr]) \n\t" + "gslwrc1 %[ftmp5], 0x00(%[pred_ptr]) \n\t" +#endif + "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" + "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" + "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" + "gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t" + "gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t" + MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride]) + MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride]) + +#if _MIPS_SIM == _ABIO32 + "ulw %[tmp0], 0x00(%[pred_prt]) \n\t" + "mtc1 %[tmp0], %[ftmp6] \n\t" +#else + "gslwlc1 %[ftmp6], 0x03(%[pred_ptr]) \n\t" + "gslwrc1 %[ftmp6], 0x00(%[pred_ptr]) \n\t" +#endif + "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" + "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" + "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" + "gsswlc1 %[ftmp2], 0x03(%[dst_ptr]) \n\t" + "gsswrc1 %[ftmp2], 0x00(%[dst_ptr]) \n\t" + MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride]) + MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride]) + +#if _MIPS_SIM == _ABIO32 + "ulw %[tmp0], 0x00(%[pred_prt]) \n\t" + "mtc1 %[tmp0], %[ftmp7] \n\t" +#else + "gslwlc1 %[ftmp7], 0x03(%[pred_ptr]) \n\t" + "gslwrc1 %[ftmp7], 0x00(%[pred_ptr]) \n\t" +#endif + "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" + "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" + "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" + "gsswlc1 %[ftmp3], 0x03(%[dst_ptr]) \n\t" + "gsswrc1 %[ftmp3], 0x00(%[dst_ptr]) \n\t" + MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride]) + MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride]) + +#if _MIPS_SIM == _ABIO32 + "ulw %[tmp0], 0x00(%[pred_prt]) \n\t" + "mtc1 %[tmp0], %[ftmp8] \n\t" +#else + "gslwlc1 %[ftmp8], 0x03(%[pred_ptr]) \n\t" + "gslwrc1 %[ftmp8], 0x00(%[pred_ptr]) \n\t" +#endif + "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" + "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t" + "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" + "gsswlc1 %[ftmp4], 0x03(%[dst_ptr]) \n\t" + "gsswrc1 %[ftmp4], 0x00(%[dst_ptr]) \n\t" + : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), + [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), + [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), + [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), + [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]), + [pred_ptr]"+&r"(pred_ptr), [dst_ptr]"+&r"(dst_ptr) + : [ip]"r"(input), [ff_ph_22a3]"f"(ff_ph_22a3), + [ff_ph_4e7b]"f"(ff_ph_4e7b), [ff_ph_04]"f"(ff_ph_04), + [pred_stride]"r"((mips_reg)pred_stride), + [dst_stride]"r"((mips_reg)dst_stride) + : "memory" + ); +} + +void vp8_dc_only_idct_add_mmi(int16_t input_dc, unsigned char *pred_ptr, + int pred_stride, unsigned char *dst_ptr, + int dst_stride) { + int a1 = ((input_dc + 4) >> 3); + double ftmp[5]; + int low32; + + __asm__ volatile ( + "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" + "pshufh %[a1], %[a1], %[ftmp0] \n\t" + "ulw %[low32], 0x00(%[pred_ptr]) \n\t" + "mtc1 %[low32], %[ftmp1] \n\t" + "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" + "paddsh %[ftmp2], %[ftmp2], %[a1] \n\t" + "packushb %[ftmp1], %[ftmp2], %[ftmp0] \n\t" + "gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t" + "gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t" + + MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride]) + MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride]) + "ulw %[low32], 0x00(%[pred_ptr]) \n\t" + "mtc1 %[low32], %[ftmp1] \n\t" + "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" + "paddsh %[ftmp2], %[ftmp2], %[a1] \n\t" + "packushb %[ftmp1], %[ftmp2], %[ftmp0] \n\t" + "gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t" + "gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t" + + MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride]) + MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride]) + "ulw %[low32], 0x00(%[pred_ptr]) \n\t" + "mtc1 %[low32], %[ftmp1] \n\t" + "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" + "paddsh %[ftmp2], %[ftmp2], %[a1] \n\t" + "packushb %[ftmp1], %[ftmp2], %[ftmp0] \n\t" + "gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t" + "gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t" + + MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride]) + MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride]) + "ulw %[low32], 0x00(%[pred_ptr]) \n\t" + "mtc1 %[low32], %[ftmp1] \n\t" + "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" + "paddsh %[ftmp2], %[ftmp2], %[a1] \n\t" + "packushb %[ftmp1], %[ftmp2], %[ftmp0] \n\t" + "gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t" + "gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t" + : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), + [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [low32]"=&r"(low32), + [dst_ptr]"+&r"(dst_ptr), [pred_ptr]"+&r"(pred_ptr) + : [dst_stride]"r"((mips_reg)dst_stride), + [pred_stride]"r"((mips_reg)pred_stride), [a1]"f"(a1) + : "memory" + ); +} + +void vp8_short_inv_walsh4x4_mmi(int16_t *input, int16_t *mb_dqcoeff) { + int i; + int16_t output[16]; + double ftmp[12]; + uint32_t tmp[1]; + DECLARE_ALIGNED(8, const uint64_t, ff_ph_03) = { 0x0003000300030003ULL }; + + __asm__ volatile ( + MMI_LI(%[tmp0], 0x03) + "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" + "mtc1 %[tmp0], %[ftmp11] \n\t" + "gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t" + "gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t" + "gsldlc1 %[ftmp2], 0x0f(%[ip]) \n\t" + "gsldrc1 %[ftmp2], 0x08(%[ip]) \n\t" + "gsldlc1 %[ftmp3], 0x17(%[ip]) \n\t" + "gsldrc1 %[ftmp3], 0x10(%[ip]) \n\t" + "gsldlc1 %[ftmp4], 0x1f(%[ip]) \n\t" + "gsldrc1 %[ftmp4], 0x18(%[ip]) \n\t" + "paddh %[ftmp5], %[ftmp1], %[ftmp2] \n\t" + "psubh %[ftmp6], %[ftmp1], %[ftmp2] \n\t" + "paddh %[ftmp7], %[ftmp3], %[ftmp4] \n\t" + "psubh %[ftmp8], %[ftmp3], %[ftmp4] \n\t" + + "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t" + "psubh %[ftmp2], %[ftmp5], %[ftmp7] \n\t" + "psubh %[ftmp3], %[ftmp6], %[ftmp8] \n\t" + "paddh %[ftmp4], %[ftmp6], %[ftmp8] \n\t" + + TRANSPOSE_4H + // a + "paddh %[ftmp5], %[ftmp1], %[ftmp4] \n\t" + // d + "psubh %[ftmp6], %[ftmp1], %[ftmp4] \n\t" + // b + "paddh %[ftmp7], %[ftmp2], %[ftmp3] \n\t" + // c + "psubh %[ftmp8], %[ftmp2], %[ftmp3] \n\t" + + "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t" + "paddh %[ftmp2], %[ftmp6], %[ftmp8] \n\t" + "psubh %[ftmp3], %[ftmp5], %[ftmp7] \n\t" + "psubh %[ftmp4], %[ftmp6], %[ftmp8] \n\t" + + "paddh %[ftmp1], %[ftmp1], %[ff_ph_03] \n\t" + "psrah %[ftmp1], %[ftmp1], %[ftmp11] \n\t" + "paddh %[ftmp2], %[ftmp2], %[ff_ph_03] \n\t" + "psrah %[ftmp2], %[ftmp2], %[ftmp11] \n\t" + "paddh %[ftmp3], %[ftmp3], %[ff_ph_03] \n\t" + "psrah %[ftmp3], %[ftmp3], %[ftmp11] \n\t" + "paddh %[ftmp4], %[ftmp4], %[ff_ph_03] \n\t" + "psrah %[ftmp4], %[ftmp4], %[ftmp11] \n\t" + + TRANSPOSE_4H + "gssdlc1 %[ftmp1], 0x07(%[op]) \n\t" + "gssdrc1 %[ftmp1], 0x00(%[op]) \n\t" + "gssdlc1 %[ftmp2], 0x0f(%[op]) \n\t" + "gssdrc1 %[ftmp2], 0x08(%[op]) \n\t" + "gssdlc1 %[ftmp3], 0x17(%[op]) \n\t" + "gssdrc1 %[ftmp3], 0x10(%[op]) \n\t" + "gssdlc1 %[ftmp4], 0x1f(%[op]) \n\t" + "gssdrc1 %[ftmp4], 0x18(%[op]) \n\t" + : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), + [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), + [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), + [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), + [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]) + : [ip]"r"(input), [op]"r"(output), [ff_ph_03]"f"(ff_ph_03) + : "memory" + ); + + for (i = 0; i < 16; i++) { + mb_dqcoeff[i * 16] = output[i]; + } +} diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl index 30cfad5e0..76721524d 100644 --- a/vp8/common/rtcd_defs.pl +++ b/vp8/common/rtcd_defs.pl @@ -86,7 +86,7 @@ $vp8_loop_filter_simple_bh_mmi=vp8_loop_filter_bhs_mmi; # #idct16 add_proto qw/void vp8_short_idct4x4llm/, "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride"; -specialize qw/vp8_short_idct4x4llm mmx neon dspr2 msa/; +specialize qw/vp8_short_idct4x4llm mmx neon dspr2 msa mmi/; #iwalsh1 add_proto qw/void vp8_short_inv_walsh4x4_1/, "short *input, short *output"; @@ -94,11 +94,11 @@ specialize qw/vp8_short_inv_walsh4x4_1 dspr2/; #iwalsh16 add_proto qw/void vp8_short_inv_walsh4x4/, "short *input, short *output"; -specialize qw/vp8_short_inv_walsh4x4 sse2 neon dspr2 msa/; +specialize qw/vp8_short_inv_walsh4x4 sse2 neon dspr2 msa mmi/; #idct1_scalar_add add_proto qw/void vp8_dc_only_idct_add/, "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride"; -specialize qw/vp8_dc_only_idct_add mmx neon dspr2 msa/; +specialize qw/vp8_dc_only_idct_add mmx neon dspr2 msa mmi/; # # RECON diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index 7fa51c60e..1dd12158c 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -119,6 +119,7 @@ VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp8_macros_msa.h # common (c) VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/sixtap_filter_mmi.c VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/loopfilter_filters_mmi.c +VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/idctllm_mmi.c ifeq ($(CONFIG_POSTPROC),yes) VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/mfqe_msa.c