From: jinbo <jinbo-hf@loongson.cn>
Date: Wed, 24 Jun 2020 09:10:20 +0000 (+0800)
Subject: vp8,vpx_dsp:[loongson] fix specification of instruction name
X-Git-Tag: v1.10.0-rc1~107
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c039b5442b342cf0d99ce3cfaf596fd1c26a7208;p=libvpx

vp8,vpx_dsp:[loongson] fix specification of instruction name

1.'xor,or,and' to 'pxor,por,pand'. In the case of operating FPR,
  gcc supports both of them, clang only supports the second type.
2.'dsrl,srl' to 'ssrld,ssrlw'. In the case of operating FPR, gcc
  supports both of them, clang only supports the second type.

Change-Id: I93b47348e7c6580d99f57dc11165b4645236533c
---

diff --git a/vp8/common/mips/mmi/dequantize_mmi.c b/vp8/common/mips/mmi/dequantize_mmi.c
index b3f8084ae..b9330a666 100644
--- a/vp8/common/mips/mmi/dequantize_mmi.c
+++ b/vp8/common/mips/mmi/dequantize_mmi.c
@@ -100,7 +100,7 @@ void vp8_dequant_idct_add_mmi(int16_t *input, int16_t *dq, unsigned char *dest,
   vp8_short_idct4x4llm_mmi(input, dest, stride, dest, stride);
 
   __asm__ volatile(
-      "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]        \n\t"
+      "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]        \n\t"
       "gssdlc1    %[ftmp0],   0x07(%[input])                  \n\t"
       "gssdrc1    %[ftmp0],   0x00(%[input])                  \n\t"
       "sdl        $0,         0x0f(%[input])                  \n\t"
diff --git a/vp8/common/mips/mmi/idctllm_mmi.c b/vp8/common/mips/mmi/idctllm_mmi.c
index 5e48f5916..4fad1d347 100644
--- a/vp8/common/mips/mmi/idctllm_mmi.c
+++ b/vp8/common/mips/mmi/idctllm_mmi.c
@@ -13,25 +13,25 @@
 #include "vpx_ports/asmdefs_mmi.h"
 
 #define TRANSPOSE_4H \
-  "xor           %[ftmp0],    %[ftmp0],    %[ftmp0]          \n\t" \
+  "pxor          %[ftmp0],    %[ftmp0],    %[ftmp0]          \n\t" \
   MMI_LI(%[tmp0], 0x93)                                            \
   "mtc1          %[tmp0],     %[ftmp10]                      \n\t" \
   "punpcklhw     %[ftmp5],    %[ftmp1],    %[ftmp0]          \n\t" \
   "punpcklhw     %[ftmp9],    %[ftmp2],    %[ftmp0]          \n\t" \
   "pshufh        %[ftmp9],    %[ftmp9],    %[ftmp10]         \n\t" \
-  "or            %[ftmp5],    %[ftmp5],    %[ftmp9]          \n\t" \
+  "por           %[ftmp5],    %[ftmp5],    %[ftmp9]          \n\t" \
   "punpckhhw     %[ftmp6],    %[ftmp1],    %[ftmp0]          \n\t" \
   "punpckhhw     %[ftmp9],    %[ftmp2],    %[ftmp0]          \n\t" \
   "pshufh        %[ftmp9],    %[ftmp9],    %[ftmp10]         \n\t" \
-  "or            %[ftmp6],    %[ftmp6],    %[ftmp9]          \n\t" \
+  "por           %[ftmp6],    %[ftmp6],    %[ftmp9]          \n\t" \
   "punpcklhw     %[ftmp7],    %[ftmp3],    %[ftmp0]          \n\t" \
   "punpcklhw     %[ftmp9],    %[ftmp4],    %[ftmp0]          \n\t" \
   "pshufh        %[ftmp9],    %[ftmp9],    %[ftmp10]         \n\t" \
-  "or            %[ftmp7],    %[ftmp7],    %[ftmp9]          \n\t" \
+  "por           %[ftmp7],    %[ftmp7],    %[ftmp9]          \n\t" \
   "punpckhhw     %[ftmp8],    %[ftmp3],    %[ftmp0]          \n\t" \
   "punpckhhw     %[ftmp9],    %[ftmp4],    %[ftmp0]          \n\t" \
   "pshufh        %[ftmp9],    %[ftmp9],    %[ftmp10]         \n\t" \
-  "or            %[ftmp8],    %[ftmp8],    %[ftmp9]          \n\t" \
+  "por           %[ftmp8],    %[ftmp8],    %[ftmp9]          \n\t" \
   "punpcklwd     %[ftmp1],    %[ftmp5],    %[ftmp7]          \n\t" \
   "punpckhwd     %[ftmp2],    %[ftmp5],    %[ftmp7]          \n\t" \
   "punpcklwd     %[ftmp3],    %[ftmp6],    %[ftmp8]          \n\t" \
@@ -49,7 +49,7 @@ void vp8_short_idct4x4llm_mmi(int16_t *input, unsigned char *pred_ptr,
   __asm__ volatile (
     MMI_LI(%[tmp0], 0x02)
     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
-    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
 
     "gsldlc1    %[ftmp1],   0x07(%[ip])                         \n\t"
     "gsldrc1    %[ftmp1],   0x00(%[ip])                         \n\t"
@@ -203,7 +203,7 @@ void vp8_dc_only_idct_add_mmi(int16_t input_dc, unsigned char *pred_ptr,
   int low32;
 
   __asm__ volatile (
-    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]        \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]        \n\t"
     "pshufh     %[a1],      %[a1],          %[ftmp0]        \n\t"
     "ulw        %[low32],   0x00(%[pred_ptr])               \n\t"
     "mtc1       %[low32],   %[ftmp1]                        \n\t"
@@ -260,7 +260,7 @@ void vp8_short_inv_walsh4x4_mmi(int16_t *input, int16_t *mb_dqcoeff) {
 
   __asm__ volatile (
     MMI_LI(%[tmp0], 0x03)
-    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
     "gsldlc1    %[ftmp1],   0x07(%[ip])                         \n\t"
     "gsldrc1    %[ftmp1],   0x00(%[ip])                         \n\t"
diff --git a/vp8/common/mips/mmi/loopfilter_filters_mmi.c b/vp8/common/mips/mmi/loopfilter_filters_mmi.c
index f2182f95c..fc1240cc2 100644
--- a/vp8/common/mips/mmi/loopfilter_filters_mmi.c
+++ b/vp8/common/mips/mmi/loopfilter_filters_mmi.c
@@ -56,14 +56,14 @@ void vp8_loop_filter_horizontal_edge_mmi(
     "gsldrc1    %[ftmp4],   0x00(%[addr1])                          \n\t"
     "pasubub    %[ftmp1],   %[ftmp3],           %[ftmp4]            \n\t"
     "psubusb    %[ftmp1],   %[ftmp1],           %[ftmp10]           \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
 
     MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step])
     "gsldlc1    %[ftmp5],   0x07(%[addr1])                          \n\t"
     "gsldrc1    %[ftmp5],   0x00(%[addr1])                          \n\t"
     "pasubub    %[ftmp9],   %[ftmp4],           %[ftmp5]            \n\t"
     "psubusb    %[ftmp1],   %[ftmp9],           %[ftmp10]           \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
 
     "gsldlc1    %[ftmp6],   0x07(%[src_ptr])                        \n\t"
     "gsldrc1    %[ftmp6],   0x00(%[src_ptr])                        \n\t"
@@ -72,21 +72,21 @@ void vp8_loop_filter_horizontal_edge_mmi(
     "gsldrc1    %[ftmp7],   0x00(%[addr0])                          \n\t"
     "pasubub    %[ftmp11],  %[ftmp7],           %[ftmp6]            \n\t"
     "psubusb    %[ftmp1],   %[ftmp11],          %[ftmp10]           \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
 
     MMI_ADDU(%[addr1], %[src_ptr], %[src_pixel_step_x2])
     "gsldlc1    %[ftmp8],   0x07(%[addr1])                          \n\t"
     "gsldrc1    %[ftmp8],   0x00(%[addr1])                          \n\t"
     "pasubub    %[ftmp1],   %[ftmp8],           %[ftmp7]            \n\t"
     "psubusb    %[ftmp1],   %[ftmp1],           %[ftmp10]           \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
 
     MMI_ADDU(%[addr1], %[addr0], %[src_pixel_step_x2])
     "gsldlc1    %[ftmp2],   0x07(%[addr1])                          \n\t"
     "gsldrc1    %[ftmp2],   0x00(%[addr1])                          \n\t"
     "pasubub    %[ftmp1],   %[ftmp2],           %[ftmp8]            \n\t"
     "psubusb    %[ftmp1],   %[ftmp1],           %[ftmp10]           \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
 
     "pasubub    %[ftmp1],   %[ftmp5],           %[ftmp6]            \n\t"
     "paddusb    %[ftmp1],   %[ftmp1],           %[ftmp1]            \n\t"
@@ -99,8 +99,8 @@ void vp8_loop_filter_horizontal_edge_mmi(
     "gsldlc1    %[ftmp10],  0x07(%[blimit])                         \n\t"
     "gsldrc1    %[ftmp10],  0x00(%[blimit])                         \n\t"
     "psubusb    %[ftmp1],   %[ftmp1],           %[ftmp10]           \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
-    "xor        %[ftmp10],  %[ftmp10],          %[ftmp10]           \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
+    "pxor       %[ftmp10],  %[ftmp10],          %[ftmp10]           \n\t"
     "pcmpeqb    %[ftmp0],   %[ftmp0],           %[ftmp10]           \n\t"
 
     "gsldlc1    %[ftmp10],  0x07(%[thresh])                         \n\t"
@@ -108,29 +108,29 @@ void vp8_loop_filter_horizontal_edge_mmi(
     "psubusb    %[ftmp1],   %[ftmp9],           %[ftmp10]           \n\t"
     "psubusb    %[ftmp2],   %[ftmp11],          %[ftmp10]           \n\t"
     "paddb      %[ftmp1],   %[ftmp1],           %[ftmp2]            \n\t"
-    "xor        %[ftmp2],   %[ftmp2],           %[ftmp2]            \n\t"
+    "pxor       %[ftmp2],   %[ftmp2],           %[ftmp2]            \n\t"
     "pcmpeqb    %[ftmp1],   %[ftmp1],           %[ftmp2]            \n\t"
     "pcmpeqb    %[ftmp2],   %[ftmp2],           %[ftmp2]            \n\t"
-    "xor        %[ftmp1],   %[ftmp1],           %[ftmp2]            \n\t"
+    "pxor       %[ftmp1],   %[ftmp1],           %[ftmp2]            \n\t"
 
-    "xor        %[ftmp4],   %[ftmp4],           %[ff_pb_80]         \n\t"
-    "xor        %[ftmp5],   %[ftmp5],           %[ff_pb_80]         \n\t"
-    "xor        %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
-    "xor        %[ftmp7],   %[ftmp7],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp4],   %[ftmp4],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp5],   %[ftmp5],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp7],   %[ftmp7],           %[ff_pb_80]         \n\t"
 
     "psubsb     %[ftmp2],   %[ftmp4],           %[ftmp7]            \n\t"
-    "and        %[ftmp2],   %[ftmp2],           %[ftmp1]            \n\t"
+    "pand       %[ftmp2],   %[ftmp2],           %[ftmp1]            \n\t"
     "psubsb     %[ftmp3],   %[ftmp6],           %[ftmp5]            \n\t"
     "paddsb     %[ftmp2],   %[ftmp2],           %[ftmp3]            \n\t"
     "paddsb     %[ftmp2],   %[ftmp2],           %[ftmp3]            \n\t"
     "paddsb     %[ftmp2],   %[ftmp2],           %[ftmp3]            \n\t"
-    "and        %[ftmp2],   %[ftmp2],           %[ftmp0]            \n\t"
+    "pand       %[ftmp2],   %[ftmp2],           %[ftmp0]            \n\t"
 
     "paddsb     %[ftmp8],   %[ftmp2],           %[ff_pb_03]         \n\t"
     "paddsb     %[ftmp9],   %[ftmp2],           %[ff_pb_04]         \n\t"
 
-    "xor        %[ftmp0],   %[ftmp0],           %[ftmp0]            \n\t"
-    "xor        %[ftmp11],  %[ftmp11],          %[ftmp11]           \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],           %[ftmp0]            \n\t"
+    "pxor       %[ftmp11],  %[ftmp11],          %[ftmp11]           \n\t"
     "punpcklbh  %[ftmp0],   %[ftmp0],           %[ftmp8]            \n\t"
     "punpckhbh  %[ftmp11],  %[ftmp11],          %[ftmp8]            \n\t"
 
@@ -139,10 +139,10 @@ void vp8_loop_filter_horizontal_edge_mmi(
     "psrah      %[ftmp0],   %[ftmp0],           %[ftmp10]           \n\t"
     "psrah      %[ftmp11],  %[ftmp11],          %[ftmp10]           \n\t"
     "packsshb   %[ftmp8],   %[ftmp0],           %[ftmp11]           \n\t"
-    "xor        %[ftmp0],   %[ftmp0],           %[ftmp0]            \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],           %[ftmp0]            \n\t"
     "punpcklbh  %[ftmp0],   %[ftmp0],           %[ftmp9]            \n\t"
     "psrah      %[ftmp0],   %[ftmp0],           %[ftmp10]           \n\t"
-    "xor        %[ftmp11],  %[ftmp11],          %[ftmp11]           \n\t"
+    "pxor       %[ftmp11],  %[ftmp11],          %[ftmp11]           \n\t"
     "punpckhbh  %[ftmp9],   %[ftmp11],          %[ftmp9]            \n\t"
     "psrah      %[ftmp9],   %[ftmp9],           %[ftmp10]           \n\t"
     "paddsh     %[ftmp11],  %[ftmp0],           %[ff_ph_01]         \n\t"
@@ -156,24 +156,24 @@ void vp8_loop_filter_horizontal_edge_mmi(
     "packsshb   %[ftmp11],  %[ftmp11],          %[ftmp9]            \n\t"
     "pandn      %[ftmp1],   %[ftmp1],           %[ftmp11]           \n\t"
     "paddsb     %[ftmp5],   %[ftmp5],           %[ftmp8]            \n\t"
-    "xor        %[ftmp5],   %[ftmp5],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp5],   %[ftmp5],           %[ff_pb_80]         \n\t"
 
     MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step])
     "gssdlc1    %[ftmp5],   0x07(%[addr1])                          \n\t"
     "gssdrc1    %[ftmp5],   0x00(%[addr1])                          \n\t"
     MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step_x2])
     "paddsb     %[ftmp4],   %[ftmp4],           %[ftmp1]            \n\t"
-    "xor        %[ftmp4],   %[ftmp4],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp4],   %[ftmp4],           %[ff_pb_80]         \n\t"
     "gssdlc1    %[ftmp4],   0x07(%[addr1])                          \n\t"
     "gssdrc1    %[ftmp4],   0x00(%[addr1])                          \n\t"
 
     "psubsb     %[ftmp6],   %[ftmp6],           %[ftmp0]            \n\t"
-    "xor        %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
     "gssdlc1    %[ftmp6],   0x07(%[src_ptr])                        \n\t"
     "gssdrc1    %[ftmp6],   0x00(%[src_ptr])                        \n\t"
 
     "psubsb     %[ftmp7],   %[ftmp7],           %[ftmp1]            \n\t"
-    "xor        %[ftmp7],   %[ftmp7],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp7],   %[ftmp7],           %[ff_pb_80]         \n\t"
     "gssdlc1    %[ftmp7],   0x07(%[addr0])                          \n\t"
     "gssdrc1    %[ftmp7],   0x00(%[addr0])                          \n\t"
 
@@ -288,23 +288,23 @@ void vp8_loop_filter_vertical_edge_mmi(unsigned char *src_ptr,
     /* abs (q2-q1) */
     "pasubub    %[ftmp7],   %[ftmp11],          %[ftmp10]           \n\t"
     "psubusb    %[ftmp7],   %[ftmp7],           %[ftmp8]            \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
     /* ftmp3: abs(q1-q0) */
     "pasubub    %[ftmp3],   %[ftmp10],          %[ftmp9]            \n\t"
     "psubusb    %[ftmp7],   %[ftmp3],           %[ftmp8]            \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
     /* ftmp4: abs(p1-p0) */
     "pasubub    %[ftmp4],   %[ftmp5],           %[ftmp6]            \n\t"
     "psubusb    %[ftmp7],   %[ftmp4],           %[ftmp8]            \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
     /* abs (p2-p1) */
     "pasubub    %[ftmp7],   %[ftmp2],           %[ftmp5]            \n\t"
     "psubusb    %[ftmp7],   %[ftmp7],           %[ftmp8]            \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
     /* abs (p3-p2) */
     "pasubub    %[ftmp7],   %[ftmp1],           %[ftmp2]            \n\t"
     "psubusb    %[ftmp7],   %[ftmp7],           %[ftmp8]            \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
 
     "gsldlc1    %[ftmp8],   0x07(%[blimit])                         \n\t"
     "gsldrc1    %[ftmp8],   0x00(%[blimit])                         \n\t"
@@ -314,14 +314,14 @@ void vp8_loop_filter_vertical_edge_mmi(unsigned char *src_ptr,
     "paddusb    %[ftmp11],  %[ftmp11],          %[ftmp11]           \n\t"
     /* abs (p1-q1) */
     "pasubub    %[ftmp12],  %[ftmp10],          %[ftmp5]            \n\t"
-    "and        %[ftmp12],  %[ftmp12],          %[ff_pb_fe]         \n\t"
+    "pand       %[ftmp12],  %[ftmp12],          %[ff_pb_fe]         \n\t"
     "li         %[tmp0],    0x01                                    \n\t"
     "mtc1       %[tmp0],    %[ftmp1]                                \n\t"
     "psrlh      %[ftmp12],  %[ftmp12],          %[ftmp1]            \n\t"
     "paddusb    %[ftmp1],   %[ftmp11],          %[ftmp12]           \n\t"
     "psubusb    %[ftmp1],   %[ftmp1],           %[ftmp8]            \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
-    "xor        %[ftmp1],   %[ftmp1],           %[ftmp1]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
+    "pxor       %[ftmp1],   %[ftmp1],           %[ftmp1]            \n\t"
     /* ftmp0:mask */
     "pcmpeqb    %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
 
@@ -331,41 +331,41 @@ void vp8_loop_filter_vertical_edge_mmi(unsigned char *src_ptr,
     /* ftmp3: abs(q1-q0)  ftmp4: abs(p1-p0) */
     "psubusb    %[ftmp4],   %[ftmp4],           %[ftmp8]            \n\t"
     "psubusb    %[ftmp3],   %[ftmp3],           %[ftmp8]            \n\t"
-    "or         %[ftmp2],   %[ftmp4],           %[ftmp3]            \n\t"
+    "por        %[ftmp2],   %[ftmp4],           %[ftmp3]            \n\t"
     "pcmpeqb    %[ftmp2],   %[ftmp2],           %[ftmp1]            \n\t"
     "pcmpeqb    %[ftmp1],   %[ftmp1],           %[ftmp1]            \n\t"
     /* ftmp1:hev */
-    "xor        %[ftmp1],   %[ftmp2],           %[ftmp1]            \n\t"
+    "pxor       %[ftmp1],   %[ftmp2],           %[ftmp1]            \n\t"
 
-    "xor        %[ftmp10],  %[ftmp10],          %[ff_pb_80]         \n\t"
-    "xor        %[ftmp9],   %[ftmp9],           %[ff_pb_80]         \n\t"
-    "xor        %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
-    "xor        %[ftmp5],   %[ftmp5],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp10],  %[ftmp10],          %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp9],   %[ftmp9],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp5],   %[ftmp5],           %[ff_pb_80]         \n\t"
 
     "psubsb     %[ftmp2],   %[ftmp5],           %[ftmp10]           \n\t"
-    "and        %[ftmp2],   %[ftmp2],           %[ftmp1]            \n\t"
+    "pand       %[ftmp2],   %[ftmp2],           %[ftmp1]            \n\t"
     "psubsb     %[ftmp3],   %[ftmp9],           %[ftmp6]            \n\t"
     "paddsb     %[ftmp2],   %[ftmp2],           %[ftmp3]            \n\t"
     "paddsb     %[ftmp2],   %[ftmp2],           %[ftmp3]            \n\t"
     "paddsb     %[ftmp2],   %[ftmp2],           %[ftmp3]            \n\t"
     /* ftmp2:filter_value */
-    "and        %[ftmp2],   %[ftmp2],           %[ftmp0]            \n\t"
+    "pand       %[ftmp2],   %[ftmp2],           %[ftmp0]            \n\t"
 
     "paddsb     %[ftmp11],  %[ftmp2],           %[ff_pb_04]         \n\t"
     "paddsb     %[ftmp12],  %[ftmp2],           %[ff_pb_03]         \n\t"
 
     "li         %[tmp0],    0x0b                                    \n\t"
     "mtc1       %[tmp0],    %[ftmp7]                                \n\t"
-    "xor       %[ftmp0],    %[ftmp0],           %[ftmp0]            \n\t"
-    "xor       %[ftmp8],    %[ftmp8],           %[ftmp8]            \n\t"
+    "pxor      %[ftmp0],    %[ftmp0],           %[ftmp0]            \n\t"
+    "pxor      %[ftmp8],    %[ftmp8],           %[ftmp8]            \n\t"
     "punpcklbh %[ftmp0],    %[ftmp0],           %[ftmp12]           \n\t"
     "punpckhbh %[ftmp8],    %[ftmp8],           %[ftmp12]           \n\t"
     "psrah     %[ftmp0],    %[ftmp0],           %[ftmp7]            \n\t"
     "psrah     %[ftmp8],    %[ftmp8],           %[ftmp7]            \n\t"
     "packsshb  %[ftmp12],   %[ftmp0],           %[ftmp8]            \n\t"
 
-    "xor       %[ftmp0],    %[ftmp0],           %[ftmp0]            \n\t"
-    "xor       %[ftmp8],    %[ftmp8],           %[ftmp8]            \n\t"
+    "pxor      %[ftmp0],    %[ftmp0],           %[ftmp0]            \n\t"
+    "pxor      %[ftmp8],    %[ftmp8],           %[ftmp8]            \n\t"
     "punpcklbh %[ftmp0],    %[ftmp0],           %[ftmp11]           \n\t"
     "punpckhbh %[ftmp8],    %[ftmp8],           %[ftmp11]           \n\t"
     "psrah     %[ftmp0],    %[ftmp0],           %[ftmp7]            \n\t"
@@ -373,9 +373,9 @@ void vp8_loop_filter_vertical_edge_mmi(unsigned char *src_ptr,
     "packsshb  %[ftmp11],   %[ftmp0],           %[ftmp8]            \n\t"
 
     "psubsb     %[ftmp9],   %[ftmp9],           %[ftmp11]           \n\t"
-    "xor        %[ftmp9],   %[ftmp9],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp9],   %[ftmp9],           %[ff_pb_80]         \n\t"
     "paddsb     %[ftmp6],   %[ftmp6],           %[ftmp12]           \n\t"
-    "xor        %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
     "paddsh     %[ftmp0],   %[ftmp0],           %[ff_ph_01]         \n\t"
     "paddsh     %[ftmp8],   %[ftmp8],           %[ff_ph_01]         \n\t"
 
@@ -386,9 +386,9 @@ void vp8_loop_filter_vertical_edge_mmi(unsigned char *src_ptr,
     "packsshb   %[ftmp2],   %[ftmp0],           %[ftmp8]            \n\t"
     "pandn      %[ftmp2],   %[ftmp1],           %[ftmp2]            \n\t"
     "psubsb     %[ftmp10],  %[ftmp10],          %[ftmp2]            \n\t"
-    "xor        %[ftmp10],  %[ftmp10],          %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp10],  %[ftmp10],          %[ff_pb_80]         \n\t"
     "paddsb     %[ftmp5],   %[ftmp5],           %[ftmp2]            \n\t"
-    "xor        %[ftmp5],   %[ftmp5],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp5],   %[ftmp5],           %[ff_pb_80]         \n\t"
 
     /* ftmp5: *op1 ; ftmp6: *op0 */
     "punpcklbh  %[ftmp2],   %[ftmp5],           %[ftmp6]            \n\t"
@@ -408,7 +408,7 @@ void vp8_loop_filter_vertical_edge_mmi(unsigned char *src_ptr,
 
     "li         %[tmp0],    0x20                                    \n\t"
     "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
-    "dsrl       %[ftmp2],   %[ftmp2],           %[ftmp9]            \n\t"
+    "ssrld      %[ftmp2],   %[ftmp2],           %[ftmp9]            \n\t"
     MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
     MMI_SUBU(%[addr1], %[addr0], %[tmp0])
     "gsswlc1    %[ftmp2],   0x05(%[addr1])                          \n\t"
@@ -419,21 +419,21 @@ void vp8_loop_filter_vertical_edge_mmi(unsigned char *src_ptr,
     "gsswlc1    %[ftmp6],   0x05(%[addr1])                          \n\t"
     "gsswrc1    %[ftmp6],   0x02(%[addr1])                          \n\t"
 
-    "dsrl       %[ftmp6],   %[ftmp6],           %[ftmp9]            \n\t"
+    "ssrld      %[ftmp6],   %[ftmp6],           %[ftmp9]            \n\t"
     MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step])
     "gsswlc1    %[ftmp6],   0x05(%[addr1])                          \n\t"
     "gsswrc1    %[ftmp6],   0x02(%[addr1])                          \n\t"
     "gsswlc1    %[ftmp1],   0x05(%[src_ptr])                        \n\t"
     "gsswrc1    %[ftmp1],   0x02(%[src_ptr])                        \n\t"
 
-    "dsrl       %[ftmp1],   %[ftmp1],           %[ftmp9]            \n\t"
+    "ssrld      %[ftmp1],   %[ftmp1],           %[ftmp9]            \n\t"
     "gsswlc1    %[ftmp1],   0x05(%[addr0])                          \n\t"
     "gsswrc1    %[ftmp1],   0x02(%[addr0])                          \n\t"
     MMI_ADDU(%[addr1], %[addr0], %[src_pixel_step])
     "gsswlc1    %[ftmp5],   0x05(%[addr1])                          \n\t"
     "gsswrc1    %[ftmp5],   0x02(%[addr1])                          \n\t"
 
-    "dsrl       %[ftmp5],   %[ftmp5],           %[ftmp9]            \n\t"
+    "ssrld      %[ftmp5],   %[ftmp5],           %[ftmp9]            \n\t"
     MMI_ADDU(%[addr1], %[addr0], %[tmp0])
     "gsswlc1    %[ftmp5],   0x05(%[addr1])                          \n\t"
     "gsswrc1    %[ftmp5],   0x02(%[addr1])                          \n\t"
@@ -532,31 +532,31 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
     "psubusb    %[ftmp0],   %[ftmp0],           %[ftmp9]            \n\t"
     "pasubub    %[ftmp1],   %[ftmp3],           %[ftmp4]            \n\t"
     "psubusb    %[ftmp1],   %[ftmp1],           %[ftmp9]            \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
     "pasubub    %[ftmp10],  %[ftmp4],           %[ftmp5]            \n\t"
     "psubusb    %[ftmp1],   %[ftmp10],          %[ftmp9]            \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
     "pasubub    %[ftmp11],  %[ftmp7],           %[ftmp6]            \n\t"
     "psubusb    %[ftmp1],   %[ftmp11],          %[ftmp9]            \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
     "pasubub    %[ftmp1],   %[ftmp8],           %[ftmp7]            \n\t"
     "psubusb    %[ftmp1],   %[ftmp1],           %[ftmp9]            \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
     "pasubub    %[ftmp1],   %[ftmp2],           %[ftmp8]            \n\t"
     "psubusb    %[ftmp1],   %[ftmp1],           %[ftmp9]            \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
 
     "pasubub    %[ftmp1],   %[ftmp5],           %[ftmp6]            \n\t"
     "paddusb    %[ftmp1],   %[ftmp1],           %[ftmp1]            \n\t"
     "pasubub    %[ftmp2],   %[ftmp4],           %[ftmp7]            \n\t"
-    "and        %[ftmp2],   %[ftmp2],           %[ff_pb_fe]         \n\t"
+    "pand       %[ftmp2],   %[ftmp2],           %[ff_pb_fe]         \n\t"
     "li         %[tmp0],    0x01                                    \n\t"
     "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
     "psrlh      %[ftmp2],   %[ftmp2],           %[ftmp9]            \n\t"
     "paddusb    %[ftmp1],   %[ftmp1],           %[ftmp2]            \n\t"
     "psubusb    %[ftmp1],   %[ftmp1],           %[ftmp12]           \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
-    "xor        %[ftmp9],   %[ftmp9],           %[ftmp9]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
+    "pxor       %[ftmp9],   %[ftmp9],           %[ftmp9]            \n\t"
     /* ftmp0: mask */
     "pcmpeqb    %[ftmp0],   %[ftmp0],           %[ftmp9]            \n\t"
 
@@ -565,24 +565,24 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
     "psubusb    %[ftmp1],   %[ftmp10],          %[ftmp9]            \n\t"
     "psubusb    %[ftmp2],   %[ftmp11],          %[ftmp9]            \n\t"
     "paddb      %[ftmp1],   %[ftmp1],           %[ftmp2]            \n\t"
-    "xor        %[ftmp2],   %[ftmp2],           %[ftmp2]            \n\t"
+    "pxor       %[ftmp2],   %[ftmp2],           %[ftmp2]            \n\t"
     "pcmpeqb    %[ftmp1],   %[ftmp1],           %[ftmp2]            \n\t"
     "pcmpeqb    %[ftmp2],   %[ftmp2],           %[ftmp2]            \n\t"
     /* ftmp1: hev */
-    "xor        %[ftmp1],   %[ftmp1],           %[ftmp2]            \n\t"
+    "pxor       %[ftmp1],   %[ftmp1],           %[ftmp2]            \n\t"
 
-    "xor        %[ftmp4],   %[ftmp4],           %[ff_pb_80]         \n\t"
-    "xor        %[ftmp5],   %[ftmp5],           %[ff_pb_80]         \n\t"
-    "xor        %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
-    "xor        %[ftmp7],   %[ftmp7],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp4],   %[ftmp4],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp5],   %[ftmp5],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp7],   %[ftmp7],           %[ff_pb_80]         \n\t"
     "psubsb     %[ftmp2],   %[ftmp4],           %[ftmp7]            \n\t"
     "psubsb     %[ftmp9],   %[ftmp6],           %[ftmp5]            \n\t"
     "paddsb     %[ftmp2],   %[ftmp2],           %[ftmp9]            \n\t"
     "paddsb     %[ftmp2],   %[ftmp2],           %[ftmp9]            \n\t"
     "paddsb     %[ftmp2],   %[ftmp2],           %[ftmp9]            \n\t"
-    "and        %[ftmp2],   %[ftmp2],           %[ftmp0]            \n\t"
+    "pand       %[ftmp2],   %[ftmp2],           %[ftmp0]            \n\t"
     "pandn      %[ftmp12],  %[ftmp1],           %[ftmp2]            \n\t"
-    "and        %[ftmp2],   %[ftmp2],           %[ftmp1]            \n\t"
+    "pand       %[ftmp2],   %[ftmp2],           %[ftmp1]            \n\t"
 
     "li         %[tmp0],    0x0b                                    \n\t"
     "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
@@ -595,13 +595,13 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
 
     "li         %[tmp0],    0x07                                    \n\t"
     "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
-    "xor        %[ftmp0],   %[ftmp0],           %[ftmp0]            \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],           %[ftmp0]            \n\t"
 
     VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_1b00])
     "psubsb     %[ftmp6],   %[ftmp6],           %[ftmp1]            \n\t"
     "paddsb     %[ftmp5],   %[ftmp5],           %[ftmp1]            \n\t"
-    "xor        %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
-    "xor        %[ftmp5],   %[ftmp5],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp5],   %[ftmp5],           %[ff_pb_80]         \n\t"
     MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
     MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0])
     "gssdlc1    %[ftmp5],   0x07(%[src_ptr])                        \n\t"
@@ -613,8 +613,8 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
     VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_1200])
     "paddsb     %[ftmp4],   %[ftmp4],           %[ftmp1]            \n\t"
     "psubsb     %[ftmp7],   %[ftmp7],           %[ftmp1]            \n\t"
-    "xor        %[ftmp4],   %[ftmp4],           %[ff_pb_80]         \n\t"
-    "xor        %[ftmp7],   %[ftmp7],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp4],   %[ftmp4],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp7],   %[ftmp7],           %[ff_pb_80]         \n\t"
     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
     "gssdlc1    %[ftmp7],   0x07(%[src_ptr])                        \n\t"
     "gssdrc1    %[ftmp7],   0x00(%[src_ptr])                        \n\t"
@@ -624,12 +624,12 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
     "gssdrc1    %[ftmp4],   0x00(%[src_ptr])                        \n\t"
 
     VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_0900])
-    "xor        %[ftmp3],   %[ftmp3],           %[ff_pb_80]         \n\t"
-    "xor        %[ftmp8],   %[ftmp8],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp3],   %[ftmp3],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp8],   %[ftmp8],           %[ff_pb_80]         \n\t"
     "paddsb     %[ftmp3],   %[ftmp3],           %[ftmp1]            \n\t"
     "psubsb     %[ftmp8],   %[ftmp8],           %[ftmp1]            \n\t"
-    "xor        %[ftmp3],   %[ftmp3],           %[ff_pb_80]         \n\t"
-    "xor        %[ftmp8],   %[ftmp8],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp3],   %[ftmp3],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp8],   %[ftmp8],           %[ff_pb_80]         \n\t"
     MMI_ADDU(%[src_ptr], %[src_ptr], %[tmp0])
     "gssdlc1    %[ftmp8],   0x07(%[src_ptr])                        \n\t"
     "gssdrc1    %[ftmp8],   0x00(%[src_ptr])                        \n\t"
@@ -662,8 +662,8 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
 }
 
 #define VP8_MBLOOP_VPSRAB_ADDH                                          \
-  "xor        %[ftmp7],   %[ftmp7],           %[ftmp7]            \n\t" \
-  "xor        %[ftmp8],   %[ftmp8],           %[ftmp8]            \n\t" \
+  "pxor       %[ftmp7],   %[ftmp7],           %[ftmp7]            \n\t" \
+  "pxor       %[ftmp8],   %[ftmp8],           %[ftmp8]            \n\t" \
   "punpcklbh  %[ftmp7],   %[ftmp7],           %[ftmp0]            \n\t" \
   "punpckhbh  %[ftmp8],   %[ftmp8],           %[ftmp0]            \n\t"
 
@@ -755,23 +755,23 @@ void vp8_mbloop_filter_vertical_edge_mmi(
     /* abs (q2-q1) */
     "pasubub    %[ftmp7],   %[ftmp11],          %[ftmp10]           \n\t"
     "psubusb    %[ftmp7],   %[ftmp7],           %[ftmp13]           \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
     /* ftmp3: abs(q1-q0) */
     "pasubub    %[ftmp3],   %[ftmp10],          %[ftmp9]            \n\t"
     "psubusb    %[ftmp7],   %[ftmp3],           %[ftmp13]           \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
     /* ftmp4: abs(p1-p0) */
     "pasubub    %[ftmp4],   %[ftmp5],           %[ftmp6]            \n\t"
     "psubusb    %[ftmp7],   %[ftmp4],           %[ftmp13]           \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
     /* abs (p2-p1) */
     "pasubub    %[ftmp7],   %[ftmp2],           %[ftmp5]            \n\t"
     "psubusb    %[ftmp7],   %[ftmp7],           %[ftmp13]           \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
     /* abs (p3-p2) */
     "pasubub    %[ftmp7],   %[ftmp1],           %[ftmp2]            \n\t"
     "psubusb    %[ftmp7],   %[ftmp7],           %[ftmp13]           \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
 
     "gsldlc1    %[ftmp13],  0x07(%[blimit])                         \n\t"
     "gsldrc1    %[ftmp13],  0x00(%[blimit])                         \n\t"
@@ -782,14 +782,14 @@ void vp8_mbloop_filter_vertical_edge_mmi(
     "paddusb    %[ftmp1],   %[ftmp1],           %[ftmp1]            \n\t"
     /* abs (p1-q1) / 2 */
     "pasubub    %[ftmp12],  %[ftmp10],          %[ftmp5]            \n\t"
-    "and        %[ftmp12],  %[ftmp12],          %[ff_pb_fe]         \n\t"
+    "pand       %[ftmp12],  %[ftmp12],          %[ff_pb_fe]         \n\t"
     "li         %[tmp0],    0x01                                    \n\t"
     "mtc1       %[tmp0],    %[ftmp8]                                \n\t"
     "psrlh      %[ftmp12],  %[ftmp12],          %[ftmp8]            \n\t"
     "paddusb    %[ftmp12],  %[ftmp1],           %[ftmp12]           \n\t"
     "psubusb    %[ftmp12],  %[ftmp12],          %[ftmp13]           \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp12]           \n\t"
-    "xor        %[ftmp12],  %[ftmp12],          %[ftmp12]           \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp12]           \n\t"
+    "pxor       %[ftmp12],  %[ftmp12],          %[ftmp12]           \n\t"
     /* ftmp0: mask */
     "pcmpeqb    %[ftmp0],   %[ftmp0],           %[ftmp12]           \n\t"
 
@@ -797,19 +797,19 @@ void vp8_mbloop_filter_vertical_edge_mmi(
     "psubusb    %[ftmp4],   %[ftmp4],           %[ftmp7]            \n\t"
     /* abs(q1-q0) - thresh */
     "psubusb    %[ftmp3],   %[ftmp3],           %[ftmp7]            \n\t"
-    "or         %[ftmp3],   %[ftmp4],           %[ftmp3]            \n\t"
+    "por        %[ftmp3],   %[ftmp4],           %[ftmp3]            \n\t"
     "pcmpeqb    %[ftmp3],   %[ftmp3],           %[ftmp12]           \n\t"
     "pcmpeqb    %[ftmp1],   %[ftmp1],           %[ftmp1]            \n\t"
     /* ftmp1: hev */
-    "xor        %[ftmp1],   %[ftmp3],           %[ftmp1]            \n\t"
+    "pxor       %[ftmp1],   %[ftmp3],           %[ftmp1]            \n\t"
 
     /* ftmp2:ps2, ftmp5:ps1, ftmp6:ps0, ftmp9:qs0, ftmp10:qs1, ftmp11:qs2 */
-    "xor        %[ftmp11],  %[ftmp11],          %[ff_pb_80]         \n\t"
-    "xor        %[ftmp10],  %[ftmp10],          %[ff_pb_80]         \n\t"
-    "xor        %[ftmp9],   %[ftmp9],           %[ff_pb_80]         \n\t"
-    "xor        %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
-    "xor        %[ftmp5],   %[ftmp5],           %[ff_pb_80]         \n\t"
-    "xor        %[ftmp2],   %[ftmp2],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp11],  %[ftmp11],          %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp10],  %[ftmp10],          %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp9],   %[ftmp9],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp5],   %[ftmp5],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp2],   %[ftmp2],           %[ff_pb_80]         \n\t"
 
     "psubsb     %[ftmp3],   %[ftmp5],           %[ftmp10]           \n\t"
     "psubsb     %[ftmp4],   %[ftmp9],           %[ftmp6]            \n\t"
@@ -817,9 +817,9 @@ void vp8_mbloop_filter_vertical_edge_mmi(
     "paddsb     %[ftmp3],   %[ftmp3],           %[ftmp4]            \n\t"
     "paddsb     %[ftmp3],   %[ftmp3],           %[ftmp4]            \n\t"
     /* filter_value &= mask */
-    "and        %[ftmp0],   %[ftmp0],           %[ftmp3]            \n\t"
+    "pand       %[ftmp0],   %[ftmp0],           %[ftmp3]            \n\t"
     /* Filter2 = filter_value & hev */
-    "and        %[ftmp3],   %[ftmp1],           %[ftmp0]            \n\t"
+    "pand       %[ftmp3],   %[ftmp1],           %[ftmp0]            \n\t"
     /* filter_value &= ~hev */
     "pandn      %[ftmp0],   %[ftmp1],           %[ftmp0]            \n\t"
 
@@ -852,10 +852,10 @@ void vp8_mbloop_filter_vertical_edge_mmi(
     VP8_MBLOOP_VPSRAB_ADDT
     "psubsb     %[ftmp4],   %[ftmp9],           %[ftmp3]            \n\t"
     /* ftmp9: oq0 */
-    "xor        %[ftmp9],   %[ftmp4],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp9],   %[ftmp4],           %[ff_pb_80]         \n\t"
     "paddsb     %[ftmp4],   %[ftmp6],           %[ftmp3]            \n\t"
     /* ftmp6: op0 */
-    "xor        %[ftmp6],   %[ftmp4],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp6],   %[ftmp4],           %[ff_pb_80]         \n\t"
 
     VP8_MBLOOP_VPSRAB_ADDH
     "paddh      %[ftmp1],   %[ff_ph_0900],      %[ff_ph_0900]       \n\t"
@@ -864,10 +864,10 @@ void vp8_mbloop_filter_vertical_edge_mmi(
     VP8_MBLOOP_VPSRAB_ADDT
     "psubsb     %[ftmp4],   %[ftmp10],          %[ftmp3]            \n\t"
     /* ftmp10: oq1 */
-    "xor        %[ftmp10],   %[ftmp4],          %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp10],   %[ftmp4],          %[ff_pb_80]         \n\t"
     "paddsb     %[ftmp4],   %[ftmp5],           %[ftmp3]            \n\t"
     /* ftmp5: op1 */
-    "xor        %[ftmp5],   %[ftmp4],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp5],   %[ftmp4],           %[ff_pb_80]         \n\t"
 
     VP8_MBLOOP_VPSRAB_ADDH
     "pmulhh     %[ftmp7],   %[ftmp7],           %[ff_ph_0900]       \n\t"
@@ -875,10 +875,10 @@ void vp8_mbloop_filter_vertical_edge_mmi(
     VP8_MBLOOP_VPSRAB_ADDT
     "psubsb     %[ftmp4],   %[ftmp11],          %[ftmp3]            \n\t"
     /* ftmp11: oq2 */
-    "xor        %[ftmp11],  %[ftmp4],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp11],  %[ftmp4],           %[ff_pb_80]         \n\t"
     "paddsb     %[ftmp4],   %[ftmp2],           %[ftmp3]            \n\t"
     /* ftmp2: op2 */
-    "xor        %[ftmp2],   %[ftmp4],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp2],   %[ftmp4],           %[ff_pb_80]         \n\t"
 
     "ldc1       %[ftmp12],  0x00(%[srct])                           \n\t"
     "ldc1       %[ftmp8],   0x08(%[srct])                           \n\t"
@@ -965,7 +965,7 @@ void vp8_mbloop_filter_vertical_edge_mmi(
   "psrlh      %[ftmp0],   %[ftmp0],           %[ftmp8]            \n\t" \
   "psrah      %[ftmp1],   %[ftmp5],           %[ftmp10]           \n\t" \
   "psllh      %[ftmp1],   %[ftmp1],           %[ftmp8]            \n\t" \
-  "or         %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
+  "por        %[ftmp0],   %[ftmp0],           %[ftmp1]            \n\t"
 
 void vp8_loop_filter_simple_horizontal_edge_mmi(unsigned char *src_ptr,
                                                 int src_pixel_step,
@@ -1008,14 +1008,14 @@ void vp8_loop_filter_simple_horizontal_edge_mmi(unsigned char *src_ptr,
     "paddusb    %[ftmp5],   %[ftmp5],           %[ftmp5]            \n\t"
     "paddusb    %[ftmp5],   %[ftmp5],           %[ftmp1]            \n\t"
     "psubusb    %[ftmp5],   %[ftmp5],           %[ftmp3]            \n\t"
-    "xor        %[ftmp3],   %[ftmp3],           %[ftmp3]            \n\t"
+    "pxor       %[ftmp3],   %[ftmp3],           %[ftmp3]            \n\t"
     "pcmpeqb    %[ftmp5],   %[ftmp5],           %[ftmp3]            \n\t"
 
-    "xor        %[ftmp2],   %[ftmp2],           %[ff_pb_80]         \n\t"
-    "xor        %[ftmp7],   %[ftmp7],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp2],   %[ftmp2],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp7],   %[ftmp7],           %[ff_pb_80]         \n\t"
     "psubsb     %[ftmp2],   %[ftmp2],           %[ftmp7]            \n\t"
-    "xor        %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
-    "xor        %[ftmp3],   %[ftmp0],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp3],   %[ftmp0],           %[ff_pb_80]         \n\t"
     "psubsb     %[ftmp0],   %[ftmp3],           %[ftmp6]            \n\t"
     "paddsb     %[ftmp2],   %[ftmp2],           %[ftmp0]            \n\t"
     "paddsb     %[ftmp2],   %[ftmp2],           %[ftmp0]            \n\t"
@@ -1025,14 +1025,14 @@ void vp8_loop_filter_simple_horizontal_edge_mmi(unsigned char *src_ptr,
     "paddsb     %[ftmp5],   %[ftmp5],           %[ff_pb_04]         \n\t"
     VP8_SIMPLE_HPSRAB
     "psubsb     %[ftmp3],   %[ftmp3],           %[ftmp0]            \n\t"
-    "xor        %[ftmp3],   %[ftmp3],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp3],   %[ftmp3],           %[ff_pb_80]         \n\t"
     "gssdlc1    %[ftmp3],   0x07(%[src_ptr])                        \n\t"
     "gssdrc1    %[ftmp3],   0x00(%[src_ptr])                        \n\t"
 
     "psubsb     %[ftmp5],   %[ftmp5],           %[ff_pb_01]         \n\t"
     VP8_SIMPLE_HPSRAB
     "paddsb     %[ftmp6],   %[ftmp6],           %[ftmp0]            \n\t"
-    "xor        %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
     MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step])
     "gssdlc1    %[ftmp6],   0x07(%[addr1])                          \n\t"
     "gssdrc1    %[ftmp6],   0x00(%[addr1])                          \n\t"
@@ -1121,7 +1121,7 @@ void vp8_loop_filter_simple_vertical_edge_mmi(unsigned char *src_ptr,
     "li         %[tmp0],    0x01                                    \n\t"
     "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
     "pasubub    %[ftmp6],   %[ftmp3],           %[ftmp0]            \n\t"
-    "and        %[ftmp6],   %[ftmp6],           %[ff_pb_fe]         \n\t"
+    "pand       %[ftmp6],   %[ftmp6],           %[ff_pb_fe]         \n\t"
     "psrlh      %[ftmp6],   %[ftmp6],           %[ftmp9]            \n\t"
     "pasubub    %[ftmp5],   %[ftmp1],           %[ftmp2]            \n\t"
     "paddusb    %[ftmp5],   %[ftmp5],           %[ftmp5]            \n\t"
@@ -1130,23 +1130,23 @@ void vp8_loop_filter_simple_vertical_edge_mmi(unsigned char *src_ptr,
     "gsldlc1    %[ftmp7],   0x07(%[blimit])                         \n\t"
     "gsldrc1    %[ftmp7],   0x00(%[blimit])                         \n\t"
     "psubusb    %[ftmp5],   %[ftmp5],           %[ftmp7]            \n\t"
-    "xor        %[ftmp7],   %[ftmp7],           %[ftmp7]            \n\t"
+    "pxor       %[ftmp7],   %[ftmp7],           %[ftmp7]            \n\t"
     "pcmpeqb    %[ftmp5],   %[ftmp5],           %[ftmp7]            \n\t"
 
     "sdc1       %[ftmp0],   0x00(%[srct])                           \n\t"
     "sdc1       %[ftmp3],   0x08(%[srct])                           \n\t"
 
-    "xor        %[ftmp0],   %[ftmp0],           %[ff_pb_80]         \n\t"
-    "xor        %[ftmp3],   %[ftmp3],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp3],   %[ftmp3],           %[ff_pb_80]         \n\t"
     "psubsb     %[ftmp0],   %[ftmp0],           %[ftmp3]            \n\t"
 
-    "xor        %[ftmp6],   %[ftmp1],           %[ff_pb_80]         \n\t"
-    "xor        %[ftmp3],   %[ftmp2],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp6],   %[ftmp1],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp3],   %[ftmp2],           %[ff_pb_80]         \n\t"
     "psubsb     %[ftmp7],   %[ftmp3],           %[ftmp6]            \n\t"
     "paddsb     %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
     "paddsb     %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
     "paddsb     %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
-    "and        %[ftmp5],   %[ftmp5],           %[ftmp0]            \n\t"
+    "pand       %[ftmp5],   %[ftmp5],           %[ftmp0]            \n\t"
     "paddsb     %[ftmp5],   %[ftmp5],           %[ff_pb_04]         \n\t"
 
     "li         %[tmp0],    0x03                                    \n\t"
@@ -1159,9 +1159,9 @@ void vp8_loop_filter_simple_vertical_edge_mmi(unsigned char *src_ptr,
     "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
     "psrah      %[ftmp7],   %[ftmp5],           %[ftmp9]            \n\t"
     "psllh      %[ftmp7],   %[ftmp7],           %[ftmp8]            \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp7]            \n\t"
     "psubsb     %[ftmp3],   %[ftmp3],           %[ftmp0]            \n\t"
-    "xor        %[ftmp3],   %[ftmp3],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp3],   %[ftmp3],           %[ff_pb_80]         \n\t"
     "psubsb     %[ftmp5],   %[ftmp5],           %[ff_pb_01]         \n\t"
 
     "li         %[tmp0],    0x03                                    \n\t"
@@ -1174,9 +1174,9 @@ void vp8_loop_filter_simple_vertical_edge_mmi(unsigned char *src_ptr,
     "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
     "psrah      %[ftmp5],   %[ftmp5],           %[ftmp9]            \n\t"
     "psllh      %[ftmp5],   %[ftmp5],           %[ftmp8]            \n\t"
-    "or         %[ftmp0],   %[ftmp0],           %[ftmp5]            \n\t"
+    "por        %[ftmp0],   %[ftmp0],           %[ftmp5]            \n\t"
     "paddsb     %[ftmp6],   %[ftmp6],           %[ftmp0]            \n\t"
-    "xor        %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
+    "pxor       %[ftmp6],   %[ftmp6],           %[ff_pb_80]         \n\t"
 
     "ldc1       %[ftmp0],   0x00(%[srct])                           \n\t"
     "ldc1       %[ftmp4],   0x08(%[srct])                           \n\t"
@@ -1195,7 +1195,7 @@ void vp8_loop_filter_simple_vertical_edge_mmi(unsigned char *src_ptr,
     "punpckhhw  %[ftmp5],   %[ftmp1],           %[ftmp3]            \n\t"
     "punpcklhw  %[ftmp1],   %[ftmp1],           %[ftmp3]            \n\t"
 
-    "dsrl       %[ftmp0],   %[ftmp0],           %[ftmp10]           \n\t"
+    "ssrld      %[ftmp0],   %[ftmp0],           %[ftmp10]           \n\t"
     MMI_SUBU(%[addr1], %[addr0], %[src_pixel_step_x4])
     "gsswlc1    %[ftmp0],   0x03(%[addr1])                          \n\t"
     "gsswrc1    %[ftmp0],   0x00(%[addr1])                          \n\t"
@@ -1203,7 +1203,7 @@ void vp8_loop_filter_simple_vertical_edge_mmi(unsigned char *src_ptr,
     "gsswlc1    %[ftmp6],   0x03(%[addr1])                          \n\t"
     "gsswrc1    %[ftmp6],   0x00(%[addr1])                          \n\t"
 
-    "dsrl       %[ftmp6],   %[ftmp6],           %[ftmp10]           \n\t"
+    "ssrld      %[ftmp6],   %[ftmp6],           %[ftmp10]           \n\t"
     "gsswlc1    %[ftmp1],   0x03(%[src_ptr])                        \n\t"
     "gsswrc1    %[ftmp1],   0x00(%[src_ptr])                        \n\t"
 
@@ -1215,11 +1215,11 @@ void vp8_loop_filter_simple_vertical_edge_mmi(unsigned char *src_ptr,
     "gsswlc1    %[ftmp5],   0x03(%[addr1])                          \n\t"
     "gsswrc1    %[ftmp5],   0x00(%[addr1])                          \n\t"
 
-    "dsrl       %[ftmp1],   %[ftmp1],           %[ftmp10]           \n\t"
+    "ssrld      %[ftmp1],   %[ftmp1],           %[ftmp10]           \n\t"
     "gsswlc1    %[ftmp1],   0x03(%[addr0])                          \n\t"
     "gsswrc1    %[ftmp1],   0x00(%[addr0])                          \n\t"
 
-    "dsrl       %[ftmp5],   %[ftmp5],           %[ftmp10]           \n\t"
+    "ssrld      %[ftmp5],   %[ftmp5],           %[ftmp10]           \n\t"
     MMI_ADDU(%[addr1], %[addr0], %[src_pixel_step_x2])
     "gsswlc1    %[ftmp5],   0x03(%[addr1])                          \n\t"
     "gsswrc1    %[ftmp5],   0x00(%[addr1])                          \n\t"
diff --git a/vp8/common/mips/mmi/sixtap_filter_mmi.c b/vp8/common/mips/mmi/sixtap_filter_mmi.c
index 77d665d45..dbe35d09f 100644
--- a/vp8/common/mips/mmi/sixtap_filter_mmi.c
+++ b/vp8/common/mips/mmi/sixtap_filter_mmi.c
@@ -110,7 +110,7 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
     "ldc1       %[ftmp3],       0x30(%[vp8_filter])                   \n\t"
     "ldc1       %[ftmp4],       0x40(%[vp8_filter])                   \n\t"
     "ldc1       %[ftmp5],       0x50(%[vp8_filter])                   \n\t"
-    "xor        %[fzero],       %[fzero],           %[fzero]          \n\t"
+    "pxor       %[fzero],       %[fzero],           %[fzero]          \n\t"
     "li         %[tmp0],        0x07                                  \n\t"
     "mtc1       %[tmp0],        %[ftmp7]                              \n\t"
     "li         %[tmp0],        0x08                                  \n\t"
@@ -137,12 +137,12 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
     "pmullh     %[ftmp6],       %[ftmp6],          %[ftmp5]           \n\t"
     "paddsh     %[ftmp8],       %[ftmp8],          %[ftmp6]           \n\t"
 
-    "dsrl       %[ftmp10],      %[ftmp10],         %[ftmp11]          \n\t"
+    "ssrld      %[ftmp10],      %[ftmp10],         %[ftmp11]          \n\t"
     "punpcklbh  %[ftmp6],       %[ftmp10],         %[fzero]           \n\t"
     "pmullh     %[ftmp6],       %[ftmp6],          %[ftmp2]           \n\t"
     "paddsh     %[ftmp8],       %[ftmp8],          %[ftmp6]           \n\t"
 
-    "dsrl       %[ftmp10],      %[ftmp10],         %[ftmp11]          \n\t"
+    "ssrld      %[ftmp10],      %[ftmp10],         %[ftmp11]          \n\t"
     "punpcklbh  %[ftmp6],       %[ftmp10],         %[fzero]           \n\t"
     "pmullh     %[ftmp6],       %[ftmp6],          %[ftmp3]           \n\t"
     "paddsh     %[ftmp8],       %[ftmp8],          %[ftmp6]           \n\t"
@@ -222,7 +222,7 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
     "ldc1       %[ftmp3],     0x30(%[vp8_filter])                     \n\t"
     "ldc1       %[ftmp4],     0x40(%[vp8_filter])                     \n\t"
     "ldc1       %[ftmp5],     0x50(%[vp8_filter])                     \n\t"
-    "xor        %[fzero],     %[fzero],        %[fzero]               \n\t"
+    "pxor       %[fzero],     %[fzero],        %[fzero]               \n\t"
     "li         %[tmp0],      0x07                                    \n\t"
     "mtc1       %[tmp0],      %[ftmp13]                               \n\t"
 
@@ -314,7 +314,7 @@ static INLINE void vp8_filter_block1d_h6_filter0_mmi(
 #endif  // _MIPS_SIM == _ABIO32
 
   __asm__ volatile (
-    "xor        %[fzero],       %[fzero],           %[fzero]          \n\t"
+    "pxor       %[fzero],       %[fzero],           %[fzero]          \n\t"
 
     "1:                                                               \n\t"
     "gsldlc1    %[ftmp0],       0x07(%[src_ptr])                      \n\t"
@@ -351,7 +351,7 @@ static INLINE void vp8_filter_block1dc_v6_filter0_mmi(
 #endif  // _MIPS_SIM == _ABIO32
 
   __asm__ volatile (
-    "xor        %[fzero],     %[fzero],        %[fzero]               \n\t"
+    "pxor       %[fzero],     %[fzero],        %[fzero]               \n\t"
 
     "1:                                                               \n\t"
     "gsldlc1    %[ftmp0],     0x07(%[src_ptr])                        \n\t"
diff --git a/vp8/encoder/mips/mmi/dct_mmi.c b/vp8/encoder/mips/mmi/dct_mmi.c
index 1f60a692d..b5ecf0f1c 100644
--- a/vp8/encoder/mips/mmi/dct_mmi.c
+++ b/vp8/encoder/mips/mmi/dct_mmi.c
@@ -24,19 +24,19 @@
   "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp0]         \n\t" \
   "punpcklhw  %[ftmp9],   %[ftmp2],   %[ftmp0]         \n\t" \
   "pshufh     %[ftmp9],   %[ftmp9],   %[ftmp10]        \n\t" \
-  "or         %[ftmp5],   %[ftmp5],   %[ftmp9]         \n\t" \
+  "por        %[ftmp5],   %[ftmp5],   %[ftmp9]         \n\t" \
   "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp0]         \n\t" \
   "punpckhhw  %[ftmp9],   %[ftmp2],   %[ftmp0]         \n\t" \
   "pshufh     %[ftmp9],   %[ftmp9],   %[ftmp10]        \n\t" \
-  "or         %[ftmp6],   %[ftmp6],   %[ftmp9]         \n\t" \
+  "por        %[ftmp6],   %[ftmp6],   %[ftmp9]         \n\t" \
   "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp0]         \n\t" \
   "punpcklhw  %[ftmp9],   %[ftmp4],   %[ftmp0]         \n\t" \
   "pshufh     %[ftmp9],   %[ftmp9],   %[ftmp10]        \n\t" \
-  "or         %[ftmp7],   %[ftmp7],   %[ftmp9]         \n\t" \
+  "por        %[ftmp7],   %[ftmp7],   %[ftmp9]         \n\t" \
   "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp0]         \n\t" \
   "punpckhhw  %[ftmp9],   %[ftmp4],   %[ftmp0]         \n\t" \
   "pshufh     %[ftmp9],   %[ftmp9],   %[ftmp10]        \n\t" \
-  "or         %[ftmp8],   %[ftmp8],   %[ftmp9]         \n\t" \
+  "por        %[ftmp8],   %[ftmp8],   %[ftmp9]         \n\t" \
   "punpcklwd  %[ftmp1],   %[ftmp5],   %[ftmp7]         \n\t" \
   "punpckhwd  %[ftmp2],   %[ftmp5],   %[ftmp7]         \n\t" \
   "punpcklwd  %[ftmp3],   %[ftmp6],   %[ftmp8]         \n\t" \
@@ -90,7 +90,7 @@ void vp8_short_fdct4x4_mmi(int16_t *input, int16_t *output, int pitch) {
   DECLARE_ALIGNED(8, const uint64_t, ff_ph_8) = { 0x0008000800080008ULL };
 
   __asm__ volatile (
-    "xor        %[ftmp0],   %[ftmp0],      %[ftmp0]         \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],      %[ftmp0]         \n\t"
     "gsldlc1    %[ftmp1],   0x07(%[ip])                     \n\t"
     "gsldrc1    %[ftmp1],   0x00(%[ip])                     \n\t"
     MMI_ADDU(%[ip], %[ip], %[pitch])
@@ -237,7 +237,7 @@ void vp8_short_walsh4x4_mmi(int16_t *input, int16_t *output, int pitch) {
 
   __asm__ volatile (
     MMI_LI(%[tmp0], 0x02)
-    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
 
     "gsldlc1    %[ftmp1],   0x07(%[ip])                         \n\t"
@@ -340,49 +340,49 @@ void vp8_short_walsh4x4_mmi(int16_t *input, int16_t *output, int pitch) {
     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
 
     "pcmpgtw    %[ftmp9],   %[ftmp0],       %[ftmp1]            \n\t"
-    "and        %[ftmp9],   %[ftmp9],       %[ff_pw_01]         \n\t"
+    "pand       %[ftmp9],   %[ftmp9],       %[ff_pw_01]         \n\t"
     "paddw      %[ftmp1],   %[ftmp1],       %[ftmp9]            \n\t"
     "paddw      %[ftmp1],   %[ftmp1],       %[ff_pw_03]         \n\t"
     "psraw      %[ftmp1],   %[ftmp1],       %[ftmp11]           \n\t"
 
     "pcmpgtw    %[ftmp9],   %[ftmp0],       %[ftmp2]            \n\t"
-    "and        %[ftmp9],   %[ftmp9],       %[ff_pw_01]         \n\t"
+    "pand       %[ftmp9],   %[ftmp9],       %[ff_pw_01]         \n\t"
     "paddw      %[ftmp2],   %[ftmp2],       %[ftmp9]            \n\t"
     "paddw      %[ftmp2],   %[ftmp2],       %[ff_pw_03]         \n\t"
     "psraw      %[ftmp2],   %[ftmp2],       %[ftmp11]           \n\t"
 
     "pcmpgtw    %[ftmp9],   %[ftmp0],       %[ftmp3]            \n\t"
-    "and        %[ftmp9],   %[ftmp9],       %[ff_pw_01]         \n\t"
+    "pand       %[ftmp9],   %[ftmp9],       %[ff_pw_01]         \n\t"
     "paddw      %[ftmp3],   %[ftmp3],       %[ftmp9]            \n\t"
     "paddw      %[ftmp3],   %[ftmp3],       %[ff_pw_03]         \n\t"
     "psraw      %[ftmp3],   %[ftmp3],       %[ftmp11]           \n\t"
 
     "pcmpgtw    %[ftmp9],   %[ftmp0],       %[ftmp4]            \n\t"
-    "and        %[ftmp9],   %[ftmp9],       %[ff_pw_01]         \n\t"
+    "pand       %[ftmp9],   %[ftmp9],       %[ff_pw_01]         \n\t"
     "paddw      %[ftmp4],   %[ftmp4],       %[ftmp9]            \n\t"
     "paddw      %[ftmp4],   %[ftmp4],       %[ff_pw_03]         \n\t"
     "psraw      %[ftmp4],   %[ftmp4],       %[ftmp11]           \n\t"
 
     "pcmpgtw    %[ftmp9],   %[ftmp0],       %[ftmp5]            \n\t"
-    "and        %[ftmp9],   %[ftmp9],       %[ff_pw_01]         \n\t"
+    "pand       %[ftmp9],   %[ftmp9],       %[ff_pw_01]         \n\t"
     "paddw      %[ftmp5],   %[ftmp5],       %[ftmp9]            \n\t"
     "paddw      %[ftmp5],   %[ftmp5],       %[ff_pw_03]         \n\t"
     "psraw      %[ftmp5],   %[ftmp5],       %[ftmp11]           \n\t"
 
     "pcmpgtw    %[ftmp9],   %[ftmp0],       %[ftmp6]            \n\t"
-    "and        %[ftmp9],   %[ftmp9],       %[ff_pw_01]         \n\t"
+    "pand       %[ftmp9],   %[ftmp9],       %[ff_pw_01]         \n\t"
     "paddw      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
     "paddw      %[ftmp6],   %[ftmp6],       %[ff_pw_03]         \n\t"
     "psraw      %[ftmp6],   %[ftmp6],       %[ftmp11]           \n\t"
 
     "pcmpgtw    %[ftmp9],   %[ftmp0],       %[ftmp7]            \n\t"
-    "and        %[ftmp9],   %[ftmp9],       %[ff_pw_01]         \n\t"
+    "pand       %[ftmp9],   %[ftmp9],       %[ff_pw_01]         \n\t"
     "paddw      %[ftmp7],   %[ftmp7],       %[ftmp9]            \n\t"
     "paddw      %[ftmp7],   %[ftmp7],       %[ff_pw_03]         \n\t"
     "psraw      %[ftmp7],   %[ftmp7],       %[ftmp11]           \n\t"
 
     "pcmpgtw    %[ftmp9],   %[ftmp0],       %[ftmp8]            \n\t"
-    "and        %[ftmp9],   %[ftmp9],       %[ff_pw_01]         \n\t"
+    "pand       %[ftmp9],   %[ftmp9],       %[ff_pw_01]         \n\t"
     "paddw      %[ftmp8],   %[ftmp8],       %[ftmp9]            \n\t"
     "paddw      %[ftmp8],   %[ftmp8],       %[ff_pw_03]         \n\t"
     "psraw      %[ftmp8],   %[ftmp8],       %[ftmp11]           \n\t"
diff --git a/vp8/encoder/mips/mmi/vp8_quantize_mmi.c b/vp8/encoder/mips/mmi/vp8_quantize_mmi.c
index 3ccb196ff..69a9e5e01 100644
--- a/vp8/encoder/mips/mmi/vp8_quantize_mmi.c
+++ b/vp8/encoder/mips/mmi/vp8_quantize_mmi.c
@@ -47,7 +47,7 @@ void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
 
   __asm__ volatile(
       // loop 0 ~ 7
-      "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]        \n\t"
+      "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]        \n\t"
       "gsldlc1    %[ftmp1],   0x07(%[coeff_ptr])              \n\t"
       "gsldrc1    %[ftmp1],   0x00(%[coeff_ptr])              \n\t"
       "li         %[tmp0],    0x0f                            \n\t"
@@ -56,10 +56,10 @@ void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
       "gsldrc1    %[ftmp2],   0x08(%[coeff_ptr])              \n\t"
 
       "psrah      %[ftmp3],   %[ftmp1],       %[ftmp9]        \n\t"
-      "xor        %[ftmp1],   %[ftmp3],       %[ftmp1]        \n\t"
+      "pxor       %[ftmp1],   %[ftmp3],       %[ftmp1]        \n\t"
       "psubh      %[ftmp1],   %[ftmp1],       %[ftmp3]        \n\t"
       "psrah      %[ftmp4],   %[ftmp2],       %[ftmp9]        \n\t"
-      "xor        %[ftmp2],   %[ftmp4],       %[ftmp2]        \n\t"
+      "pxor       %[ftmp2],   %[ftmp4],       %[ftmp2]        \n\t"
       "psubh      %[ftmp2],   %[ftmp2],       %[ftmp4]        \n\t"
 
       "gsldlc1    %[ftmp5],   0x07(%[round_ptr])              \n\t"
@@ -75,8 +75,8 @@ void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
       "pmulhuh    %[ftmp5],   %[ftmp5],       %[ftmp7]        \n\t"
       "pmulhuh    %[ftmp6],   %[ftmp6],       %[ftmp8]        \n\t"
 
-      "xor        %[ftmp7],   %[ftmp5],       %[ftmp3]        \n\t"
-      "xor        %[ftmp8],   %[ftmp6],       %[ftmp4]        \n\t"
+      "pxor       %[ftmp7],   %[ftmp5],       %[ftmp3]        \n\t"
+      "pxor       %[ftmp8],   %[ftmp6],       %[ftmp4]        \n\t"
       "psubh      %[ftmp7],   %[ftmp7],       %[ftmp3]        \n\t"
       "psubh      %[ftmp8],   %[ftmp8],       %[ftmp4]        \n\t"
       "gssdlc1    %[ftmp7],   0x07(%[qcoeff_ptr])             \n\t"
@@ -90,10 +90,10 @@ void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
       "gsldrc1    %[ftmp2],   0x08(%[inv_zig_zag])            \n\t"
       "pcmpeqh    %[ftmp5],   %[ftmp5],       %[ftmp0]        \n\t"
       "pcmpeqh    %[ftmp6],   %[ftmp6],       %[ftmp0]        \n\t"
-      "xor        %[ftmp5],   %[ftmp5],       %[ones]         \n\t"
-      "xor        %[ftmp6],   %[ftmp6],       %[ones]         \n\t"
-      "and        %[ftmp5],   %[ftmp5],       %[ftmp1]        \n\t"
-      "and        %[ftmp6],   %[ftmp6],       %[ftmp2]        \n\t"
+      "pxor       %[ftmp5],   %[ftmp5],       %[ones]         \n\t"
+      "pxor       %[ftmp6],   %[ftmp6],       %[ones]         \n\t"
+      "pand       %[ftmp5],   %[ftmp5],       %[ftmp1]        \n\t"
+      "pand       %[ftmp6],   %[ftmp6],       %[ftmp2]        \n\t"
       "pmaxsh     %[ftmp10],  %[ftmp5],       %[ftmp6]        \n\t"
 
       "gsldlc1    %[ftmp5],   0x07(%[dequant_ptr])            \n\t"
@@ -114,10 +114,10 @@ void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
       "gsldrc1    %[ftmp2],   0x18(%[coeff_ptr])              \n\t"
 
       "psrah      %[ftmp3],   %[ftmp1],       %[ftmp9]        \n\t"
-      "xor        %[ftmp1],   %[ftmp3],       %[ftmp1]        \n\t"
+      "pxor       %[ftmp1],   %[ftmp3],       %[ftmp1]        \n\t"
       "psubh      %[ftmp1],   %[ftmp1],       %[ftmp3]        \n\t"
       "psrah      %[ftmp4],   %[ftmp2],       %[ftmp9]        \n\t"
-      "xor        %[ftmp2],   %[ftmp4],       %[ftmp2]        \n\t"
+      "pxor       %[ftmp2],   %[ftmp4],       %[ftmp2]        \n\t"
       "psubh      %[ftmp2],   %[ftmp2],       %[ftmp4]        \n\t"
 
       "gsldlc1    %[ftmp5],   0x17(%[round_ptr])              \n\t"
@@ -133,8 +133,8 @@ void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
       "pmulhuh    %[ftmp5],   %[ftmp5],       %[ftmp7]        \n\t"
       "pmulhuh    %[ftmp6],   %[ftmp6],       %[ftmp8]        \n\t"
 
-      "xor        %[ftmp7],   %[ftmp5],       %[ftmp3]        \n\t"
-      "xor        %[ftmp8],   %[ftmp6],       %[ftmp4]        \n\t"
+      "pxor       %[ftmp7],   %[ftmp5],       %[ftmp3]        \n\t"
+      "pxor       %[ftmp8],   %[ftmp6],       %[ftmp4]        \n\t"
       "psubh      %[ftmp7],   %[ftmp7],       %[ftmp3]        \n\t"
       "psubh      %[ftmp8],   %[ftmp8],       %[ftmp4]        \n\t"
       "gssdlc1    %[ftmp7],   0x17(%[qcoeff_ptr])             \n\t"
@@ -148,10 +148,10 @@ void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
       "gsldrc1    %[ftmp2],   0x18(%[inv_zig_zag])            \n\t"
       "pcmpeqh    %[ftmp5],   %[ftmp5],       %[ftmp0]        \n\t"
       "pcmpeqh    %[ftmp6],   %[ftmp6],       %[ftmp0]        \n\t"
-      "xor        %[ftmp5],   %[ftmp5],       %[ones]         \n\t"
-      "xor        %[ftmp6],   %[ftmp6],       %[ones]         \n\t"
-      "and        %[ftmp5],   %[ftmp5],       %[ftmp1]        \n\t"
-      "and        %[ftmp6],   %[ftmp6],       %[ftmp2]        \n\t"
+      "pxor       %[ftmp5],   %[ftmp5],       %[ones]         \n\t"
+      "pxor       %[ftmp6],   %[ftmp6],       %[ones]         \n\t"
+      "pand       %[ftmp5],   %[ftmp5],       %[ftmp1]        \n\t"
+      "pand       %[ftmp6],   %[ftmp6],       %[ftmp2]        \n\t"
       "pmaxsh     %[ftmp11],  %[ftmp5],       %[ftmp6]        \n\t"
 
       "gsldlc1    %[ftmp5],   0x17(%[dequant_ptr])            \n\t"
@@ -177,7 +177,7 @@ void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
       "pmaxsh     %[ftmp10],  %[ftmp10],       %[ftmp11]      \n\t"
       "li         %[tmp0],    0xffff                          \n\t"
       "mtc1       %[tmp0],    %[ftmp9]                        \n\t"
-      "and        %[ftmp10],  %[ftmp10],       %[ftmp9]       \n\t"
+      "pand       %[ftmp10],  %[ftmp10],       %[ftmp9]       \n\t"
       "gssdlc1    %[ftmp10],  0x07(%[eob])                    \n\t"
       "gssdrc1    %[ftmp10],  0x00(%[eob])                    \n\t"
       : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]),
@@ -217,7 +217,7 @@ void vp8_regular_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
   //  memset(dqcoeff_ptr, 0, 32);
   /* clang-format off */
   __asm__ volatile (
-    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]        \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]        \n\t"
     "gssdlc1    %[ftmp0],   0x07(%[qcoeff_ptr])             \n\t"
     "gssdrc1    %[ftmp0],   0x00(%[qcoeff_ptr])             \n\t"
     "gssdlc1    %[ftmp0],   0x0f(%[qcoeff_ptr])             \n\t"
diff --git a/vpx_dsp/mips/sad_mmi.c b/vpx_dsp/mips/sad_mmi.c
index 4368db5fd..5dee3164b 100644
--- a/vpx_dsp/mips/sad_mmi.c
+++ b/vpx_dsp/mips/sad_mmi.c
@@ -365,7 +365,7 @@ static inline unsigned int vpx_sad64x(const uint8_t *src, int src_stride,
   mips_reg l_counter = counter;
 
   __asm__ volatile (
-    "xor        %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
+    "pxor       %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
     "1:                                                         \n\t"
     // Include two loop body, to reduce loop time.
     SAD_SRC_REF_ABS_SUB_64
@@ -407,7 +407,7 @@ static inline unsigned int vpx_sad_avg64x(const uint8_t *src, int src_stride,
   mips_reg l_counter = counter;
 
   __asm__ volatile (
-    "xor        %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
+    "pxor       %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
     "1:                                                         \n\t"
     // Include two loop body, to reduce loop time.
     SAD_SRC_AVGREF_ABS_SUB_64
@@ -451,7 +451,7 @@ static inline unsigned int vpx_sad32x(const uint8_t *src, int src_stride,
   mips_reg l_counter = counter;
 
   __asm__ volatile (
-    "xor        %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
+    "pxor       %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
     "1:                                                         \n\t"
     // Include two loop body, to reduce loop time.
     SAD_SRC_REF_ABS_SUB_32
@@ -495,7 +495,7 @@ static inline unsigned int vpx_sad_avg32x(const uint8_t *src, int src_stride,
   mips_reg l_counter = counter;
 
   __asm__ volatile (
-    "xor        %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
+    "pxor       %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
     "1:                                                         \n\t"
     // Include two loop body, to reduce loop time.
     SAD_SRC_AVGREF_ABS_SUB_32
@@ -540,7 +540,7 @@ static inline unsigned int vpx_sad16x(const uint8_t *src, int src_stride,
   mips_reg l_counter = counter;
 
   __asm__ volatile (
-    "xor        %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
+    "pxor       %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
     "1:                                                         \n\t"
     // Include two loop body, to reduce loop time.
     SAD_SRC_REF_ABS_SUB_16
@@ -588,7 +588,7 @@ static inline unsigned int vpx_sad_avg16x(const uint8_t *src, int src_stride,
   mips_reg l_counter = counter;
 
   __asm__ volatile (
-    "xor        %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
+    "pxor       %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
     "1:                                                         \n\t"
     // Include two loop body, to reduce loop time.
     SAD_SRC_AVGREF_ABS_SUB_16
@@ -633,7 +633,7 @@ static inline unsigned int vpx_sad8x(const uint8_t *src, int src_stride,
   mips_reg l_counter = counter;
 
   __asm__ volatile (
-    "xor        %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
+    "pxor       %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
     "1:                                                         \n\t"
     // Include two loop body, to reduce loop time.
     SAD_SRC_REF_ABS_SUB_8
@@ -681,7 +681,7 @@ static inline unsigned int vpx_sad_avg8x(const uint8_t *src, int src_stride,
   mips_reg l_counter = counter;
 
   __asm__ volatile (
-    "xor        %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
+    "pxor       %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
     "1:                                                         \n\t"
     // Include two loop body, to reduce loop time.
     SAD_SRC_AVGREF_ABS_SUB_8
@@ -725,7 +725,7 @@ static inline unsigned int vpx_sad4x(const uint8_t *src, int src_stride,
   mips_reg l_counter = counter;
 
   __asm__ volatile (
-    "xor        %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
+    "pxor       %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
     "1:                                                         \n\t"
     // Include two loop body, to reduce loop time.
     SAD_SRC_REF_ABS_SUB_4
@@ -769,7 +769,7 @@ static inline unsigned int vpx_sad_avg4x(const uint8_t *src, int src_stride,
   mips_reg l_counter = counter;
 
   __asm__ volatile (
-    "xor        %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
+    "pxor       %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
     "1:                                                         \n\t"
     // Include two loop body, to reduce loop time.
     SAD_SRC_AVGREF_ABS_SUB_4
diff --git a/vpx_dsp/mips/subtract_mmi.c b/vpx_dsp/mips/subtract_mmi.c
index 9f361704a..8bd7e6977 100644
--- a/vpx_dsp/mips/subtract_mmi.c
+++ b/vpx_dsp/mips/subtract_mmi.c
@@ -24,7 +24,7 @@ void vpx_subtract_block_mmi(int rows, int cols, int16_t *diff,
     switch (rows) {
       case 4:
         __asm__ volatile(
-            "xor        %[ftmp0],   %[ftmp0],           %[ftmp0]        \n\t"
+            "pxor       %[ftmp0],   %[ftmp0],           %[ftmp0]        \n\t"
 #if _MIPS_SIM == _ABIO32
             "ulw        %[tmp0],    0x00(%[src])                        \n\t"
             "mtc1       %[tmp0],    %[ftmp1]                            \n\t"
@@ -118,7 +118,7 @@ void vpx_subtract_block_mmi(int rows, int cols, int16_t *diff,
         break;
       case 8:
         __asm__ volatile(
-            "xor        %[ftmp0],   %[ftmp0],           %[ftmp0]        \n\t"
+            "pxor       %[ftmp0],   %[ftmp0],           %[ftmp0]        \n\t"
             "li         %[tmp0],    0x02                                \n\t"
             "1:                                                         \n\t"
             "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
@@ -206,7 +206,7 @@ void vpx_subtract_block_mmi(int rows, int cols, int16_t *diff,
         break;
       case 16:
         __asm__ volatile(
-            "xor        %[ftmp0],   %[ftmp0],           %[ftmp0]        \n\t"
+            "pxor       %[ftmp0],   %[ftmp0],           %[ftmp0]        \n\t"
             "li         %[tmp0],    0x08                                \n\t"
             "1:                                                         \n\t"
             "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
diff --git a/vpx_dsp/mips/variance_mmi.c b/vpx_dsp/mips/variance_mmi.c
index c1780c33a..29e52a1a8 100644
--- a/vpx_dsp/mips/variance_mmi.c
+++ b/vpx_dsp/mips/variance_mmi.c
@@ -150,7 +150,7 @@ static const uint8_t bilinear_filters[8][2] = {
   "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp6]            \n\t" \
                                                                     \
   /* store: temp2[0] ~ temp2[3] */                                  \
-  "and        %[ftmp2],   %[ftmp2],       %[mask]             \n\t" \
+  "pand       %[ftmp2],   %[ftmp2],       %[mask]             \n\t" \
   "packushb   %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t" \
   "gssdrc1    %[ftmp2],   0x00(%[temp2_ptr])                  \n\t"
 
@@ -163,7 +163,7 @@ static const uint8_t bilinear_filters[8][2] = {
   "psrlh      %[ftmp4],   %[ftmp4],       %[ftmp6]            \n\t" \
                                                                     \
   /* store: temp2[0] ~ temp2[3] */                                  \
-  "and        %[ftmp4],   %[ftmp4],       %[mask]             \n\t" \
+  "pand       %[ftmp4],   %[ftmp4],       %[mask]             \n\t" \
   "packushb   %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t" \
   "gssdrc1    %[ftmp4],   0x00(%[temp2_ptr])                  \n\t"
 
@@ -225,8 +225,8 @@ static const uint8_t bilinear_filters[8][2] = {
   "psrlh      %[ftmp3],   %[ftmp3],       %[ftmp14]           \n\t" \
                                                                     \
   /* store: temp2[0] ~ temp2[7] */                                  \
-  "and        %[ftmp2],   %[ftmp2],       %[mask]             \n\t" \
-  "and        %[ftmp3],   %[ftmp3],       %[mask]             \n\t" \
+  "pand       %[ftmp2],   %[ftmp2],       %[mask]             \n\t" \
+  "pand       %[ftmp3],   %[ftmp3],       %[mask]             \n\t" \
   "packushb   %[ftmp2],   %[ftmp2],       %[ftmp3]            \n\t" \
   "gssdlc1    %[ftmp2],   0x07(%[temp2_ptr])                  \n\t" \
   "gssdrc1    %[ftmp2],   0x00(%[temp2_ptr])                  \n\t"
@@ -247,8 +247,8 @@ static const uint8_t bilinear_filters[8][2] = {
   "psrlh      %[ftmp9],   %[ftmp9],       %[ftmp14]           \n\t" \
                                                                     \
   /* store: temp2[0] ~ temp2[7] */                                  \
-  "and        %[ftmp8],   %[ftmp8],       %[mask]             \n\t" \
-  "and        %[ftmp9],   %[ftmp9],       %[mask]             \n\t" \
+  "pand       %[ftmp8],   %[ftmp8],       %[mask]             \n\t" \
+  "pand       %[ftmp9],   %[ftmp9],       %[mask]             \n\t" \
   "packushb   %[ftmp8],   %[ftmp8],       %[ftmp9]            \n\t" \
   "gssdlc1    %[ftmp8],   0x07(%[temp2_ptr])                  \n\t" \
   "gssdrc1    %[ftmp8],   0x00(%[temp2_ptr])                  \n\t"
@@ -319,8 +319,8 @@ static const uint8_t bilinear_filters[8][2] = {
   "psrlh      %[ftmp5],   %[ftmp5],       %[ftmp14]           \n\t" \
                                                                     \
   /* store: temp2[8] ~ temp2[15] */                                 \
-  "and        %[ftmp4],   %[ftmp4],       %[mask]             \n\t" \
-  "and        %[ftmp5],   %[ftmp5],       %[mask]             \n\t" \
+  "pand       %[ftmp4],   %[ftmp4],       %[mask]             \n\t" \
+  "pand       %[ftmp5],   %[ftmp5],       %[mask]             \n\t" \
   "packushb   %[ftmp4],   %[ftmp4],       %[ftmp5]            \n\t" \
   "gssdlc1    %[ftmp4],   0x0f(%[temp2_ptr])                  \n\t" \
   "gssdrc1    %[ftmp4],   0x08(%[temp2_ptr])                  \n\t"
@@ -343,8 +343,8 @@ static const uint8_t bilinear_filters[8][2] = {
   "psrlh      %[ftmp11],  %[ftmp11],      %[ftmp14]           \n\t" \
                                                                     \
   /* store: temp2[8] ~ temp2[15] */                                 \
-  "and        %[ftmp10],  %[ftmp10],      %[mask]             \n\t" \
-  "and        %[ftmp11],  %[ftmp11],      %[mask]             \n\t" \
+  "pand       %[ftmp10],  %[ftmp10],      %[mask]             \n\t" \
+  "pand       %[ftmp11],  %[ftmp11],      %[mask]             \n\t" \
   "packushb   %[ftmp10],  %[ftmp10],      %[ftmp11]           \n\t" \
   "gssdlc1    %[ftmp10],  0x0f(%[temp2_ptr])                  \n\t" \
   "gssdrc1    %[ftmp10],  0x08(%[temp2_ptr])                  \n\t"
@@ -418,9 +418,9 @@ static inline uint32_t vpx_variance64x(const uint8_t *src_ptr, int src_stride,
     "li         %[tmp0],    0x20                                \n\t"
     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
     MMI_L(%[tmp0], %[high], 0x00)
-    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
-    "xor        %[ftmp9],   %[ftmp9],       %[ftmp9]            \n\t"
-    "xor        %[ftmp10],  %[ftmp10],      %[ftmp10]           \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
+    "pxor       %[ftmp9],   %[ftmp9],       %[ftmp9]            \n\t"
+    "pxor       %[ftmp10],  %[ftmp10],      %[ftmp10]           \n\t"
     "1:                                                         \n\t"
     "gsldlc1    %[ftmp1],   0x07(%[src_ptr])                    \n\t"
     "gsldrc1    %[ftmp1],   0x00(%[src_ptr])                    \n\t"
@@ -478,7 +478,7 @@ static inline uint32_t vpx_variance64x(const uint8_t *src_ptr, int src_stride,
     "mfc1       %[tmp1],    %[ftmp9]                            \n\t"
     "mfhc1      %[tmp2],    %[ftmp9]                            \n\t"
     "addu       %[sum],     %[tmp1],        %[tmp2]             \n\t"
-    "dsrl       %[ftmp1],   %[ftmp10],      %[ftmp11]           \n\t"
+    "ssrld      %[ftmp1],   %[ftmp10],      %[ftmp11]           \n\t"
     "paddw      %[ftmp1],   %[ftmp1],       %[ftmp10]           \n\t"
     "swc1       %[ftmp1],   0x00(%[sse])                        \n\t"
     : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
@@ -523,9 +523,9 @@ uint32_t vpx_variance32x64_mmi(const uint8_t *src_ptr, int src_stride,
     "li         %[tmp0],    0x20                                \n\t"
     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
     "li         %[tmp0],    0x40                                \n\t"
-    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
-    "xor        %[ftmp9],   %[ftmp9],       %[ftmp9]            \n\t"
-    "xor        %[ftmp10],  %[ftmp10],      %[ftmp10]           \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
+    "pxor       %[ftmp9],   %[ftmp9],       %[ftmp9]            \n\t"
+    "pxor       %[ftmp10],  %[ftmp10],      %[ftmp10]           \n\t"
     "1:                                                         \n\t"
     "gsldlc1    %[ftmp1],   0x07(%[src_ptr])                    \n\t"
     "gsldrc1    %[ftmp1],   0x00(%[src_ptr])                    \n\t"
@@ -559,7 +559,7 @@ uint32_t vpx_variance32x64_mmi(const uint8_t *src_ptr, int src_stride,
     "mfc1       %[tmp1],    %[ftmp9]                            \n\t"
     "mfhc1      %[tmp2],    %[ftmp9]                            \n\t"
     "addu       %[sum],     %[tmp1],        %[tmp2]             \n\t"
-    "dsrl       %[ftmp1],   %[ftmp10],      %[ftmp11]           \n\t"
+    "ssrld      %[ftmp1],   %[ftmp10],      %[ftmp11]           \n\t"
     "paddw      %[ftmp1],   %[ftmp1],       %[ftmp10]           \n\t"
     "swc1       %[ftmp1],   0x00(%[sse])                        \n\t"
     : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
@@ -594,10 +594,10 @@ static inline uint32_t vpx_variance32x(const uint8_t *src_ptr, int src_stride,
     "li         %[tmp0],    0x20                                \n\t"
     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
     MMI_L(%[tmp0], %[high], 0x00)
-    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
-    "xor        %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"
-    "xor        %[ftmp10],  %[ftmp10],      %[ftmp10]           \n\t"
-    "xor        %[ftmp12],  %[ftmp12],      %[ftmp12]           \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
+    "pxor       %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"
+    "pxor       %[ftmp10],  %[ftmp10],      %[ftmp10]           \n\t"
+    "pxor       %[ftmp12],  %[ftmp12],      %[ftmp12]           \n\t"
     "1:                                                         \n\t"
     "gsldlc1    %[ftmp1],   0x07(%[src_ptr])                    \n\t"
     "gsldrc1    %[ftmp1],   0x00(%[src_ptr])                    \n\t"
@@ -625,7 +625,7 @@ static inline uint32_t vpx_variance32x(const uint8_t *src_ptr, int src_stride,
     MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
     "bnez       %[tmp0],    1b                                  \n\t"
 
-    "dsrl       %[ftmp9],   %[ftmp8],       %[ftmp11]           \n\t"
+    "ssrld      %[ftmp9],   %[ftmp8],       %[ftmp11]           \n\t"
     "paddw      %[ftmp9],   %[ftmp9],       %[ftmp8]            \n\t"
     "swc1       %[ftmp9],   0x00(%[sse])                        \n\t"
 
@@ -636,7 +636,7 @@ static inline uint32_t vpx_variance32x(const uint8_t *src_ptr, int src_stride,
     "paddw      %[ftmp3],   %[ftmp3],       %[ftmp4]            \n\t"
     "psubw      %[ftmp3],   %[ftmp3],       %[ftmp5]            \n\t"
     "psubw      %[ftmp3],   %[ftmp3],       %[ftmp6]            \n\t"
-    "dsrl       %[ftmp0],   %[ftmp3],       %[ftmp11]           \n\t"
+    "ssrld      %[ftmp0],   %[ftmp3],       %[ftmp11]           \n\t"
     "paddw      %[ftmp0],   %[ftmp0],       %[ftmp3]            \n\t"
     "swc1       %[ftmp0],   0x00(%[sum])                        \n\t"
 
@@ -680,10 +680,10 @@ static inline uint32_t vpx_variance16x(const uint8_t *src_ptr, int src_stride,
     "li         %[tmp0],    0x20                                \n\t"
     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
     MMI_L(%[tmp0], %[high], 0x00)
-    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
-    "xor        %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"
-    "xor        %[ftmp10],  %[ftmp10],      %[ftmp10]           \n\t"
-    "xor        %[ftmp12],  %[ftmp12],      %[ftmp12]           \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
+    "pxor       %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"
+    "pxor       %[ftmp10],  %[ftmp10],      %[ftmp10]           \n\t"
+    "pxor       %[ftmp12],  %[ftmp12],      %[ftmp12]           \n\t"
     "1:                                                         \n\t"
     "gsldlc1    %[ftmp1],   0x07(%[src_ptr])                    \n\t"
     "gsldrc1    %[ftmp1],   0x00(%[src_ptr])                    \n\t"
@@ -701,7 +701,7 @@ static inline uint32_t vpx_variance16x(const uint8_t *src_ptr, int src_stride,
     MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
     "bnez       %[tmp0],    1b                                  \n\t"
 
-    "dsrl       %[ftmp9],   %[ftmp8],       %[ftmp11]           \n\t"
+    "ssrld      %[ftmp9],   %[ftmp8],       %[ftmp11]           \n\t"
     "paddw      %[ftmp9],   %[ftmp9],       %[ftmp8]            \n\t"
     "swc1       %[ftmp9],   0x00(%[sse])                        \n\t"
 
@@ -712,7 +712,7 @@ static inline uint32_t vpx_variance16x(const uint8_t *src_ptr, int src_stride,
     "paddw      %[ftmp3],   %[ftmp3],       %[ftmp4]            \n\t"
     "psubw      %[ftmp3],   %[ftmp3],       %[ftmp5]            \n\t"
     "psubw      %[ftmp3],   %[ftmp3],       %[ftmp6]            \n\t"
-    "dsrl       %[ftmp0],   %[ftmp3],       %[ftmp11]           \n\t"
+    "ssrld      %[ftmp0],   %[ftmp3],       %[ftmp11]           \n\t"
     "paddw      %[ftmp0],   %[ftmp0],       %[ftmp3]            \n\t"
     "swc1       %[ftmp0],   0x00(%[sum])                        \n\t"
 
@@ -757,10 +757,10 @@ static inline uint32_t vpx_variance8x(const uint8_t *src_ptr, int src_stride,
     "li         %[tmp0],    0x20                                \n\t"
     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
     MMI_L(%[tmp0], %[high], 0x00)
-    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
-    "xor        %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"
-    "xor        %[ftmp10],  %[ftmp10],      %[ftmp10]           \n\t"
-    "xor        %[ftmp12],  %[ftmp12],      %[ftmp12]           \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
+    "pxor       %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"
+    "pxor       %[ftmp10],  %[ftmp10],      %[ftmp10]           \n\t"
+    "pxor       %[ftmp12],  %[ftmp12],      %[ftmp12]           \n\t"
     "1:                                                         \n\t"
     "gsldlc1    %[ftmp1],   0x07(%[src_ptr])                    \n\t"
     "gsldrc1    %[ftmp1],   0x00(%[src_ptr])                    \n\t"
@@ -773,7 +773,7 @@ static inline uint32_t vpx_variance8x(const uint8_t *src_ptr, int src_stride,
     MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
     "bnez       %[tmp0],    1b                                  \n\t"
 
-    "dsrl       %[ftmp9],   %[ftmp8],       %[ftmp11]           \n\t"
+    "ssrld      %[ftmp9],   %[ftmp8],       %[ftmp11]           \n\t"
     "paddw      %[ftmp9],   %[ftmp9],       %[ftmp8]            \n\t"
     "swc1       %[ftmp9],   0x00(%[sse])                        \n\t"
 
@@ -784,7 +784,7 @@ static inline uint32_t vpx_variance8x(const uint8_t *src_ptr, int src_stride,
     "paddw      %[ftmp3],   %[ftmp3],       %[ftmp4]            \n\t"
     "psubw      %[ftmp3],   %[ftmp3],       %[ftmp5]            \n\t"
     "psubw      %[ftmp3],   %[ftmp3],       %[ftmp6]            \n\t"
-    "dsrl       %[ftmp0],   %[ftmp3],       %[ftmp11]           \n\t"
+    "ssrld      %[ftmp0],   %[ftmp3],       %[ftmp11]           \n\t"
     "paddw      %[ftmp0],   %[ftmp0],       %[ftmp3]            \n\t"
     "swc1       %[ftmp0],   0x00(%[sum])                        \n\t"
 
@@ -829,10 +829,10 @@ static inline uint32_t vpx_variance4x(const uint8_t *src_ptr, int src_stride,
     "li         %[tmp0],    0x20                                \n\t"
     "mtc1       %[tmp0],    %[ftmp10]                           \n\t"
     MMI_L(%[tmp0], %[high], 0x00)
-    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
-    "xor        %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
-    "xor        %[ftmp7],   %[ftmp7],       %[ftmp7]            \n\t"
-    "xor        %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
+    "pxor       %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
+    "pxor       %[ftmp7],   %[ftmp7],       %[ftmp7]            \n\t"
+    "pxor       %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"
     "1:                                                         \n\t"
     "gsldlc1    %[ftmp1],   0x07(%[src_ptr])                    \n\t"
     "gsldrc1    %[ftmp1],   0x00(%[src_ptr])                    \n\t"
@@ -845,7 +845,7 @@ static inline uint32_t vpx_variance4x(const uint8_t *src_ptr, int src_stride,
     MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
     "bnez       %[tmp0],    1b                                  \n\t"
 
-    "dsrl       %[ftmp9],   %[ftmp6],       %[ftmp10]           \n\t"
+    "ssrld      %[ftmp9],   %[ftmp6],       %[ftmp10]           \n\t"
     "paddw      %[ftmp9],   %[ftmp9],       %[ftmp6]            \n\t"
     "swc1       %[ftmp9],   0x00(%[sse])                        \n\t"
 
@@ -856,7 +856,7 @@ static inline uint32_t vpx_variance4x(const uint8_t *src_ptr, int src_stride,
     "paddw      %[ftmp3],   %[ftmp3],       %[ftmp4]            \n\t"
     "psubw      %[ftmp3],   %[ftmp3],       %[ftmp5]            \n\t"
     "psubw      %[ftmp3],   %[ftmp3],       %[ftmp6]            \n\t"
-    "dsrl       %[ftmp0],   %[ftmp3],       %[ftmp10]           \n\t"
+    "ssrld      %[ftmp0],   %[ftmp3],       %[ftmp10]           \n\t"
     "paddw      %[ftmp0],   %[ftmp0],       %[ftmp3]            \n\t"
     "swc1       %[ftmp0],   0x00(%[sum])                        \n\t"
     : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
@@ -898,8 +898,8 @@ static inline uint32_t vpx_mse16x(const uint8_t *src_ptr, int src_stride,
     "li         %[tmp0],    0x20                                \n\t"
     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
     MMI_L(%[tmp0], %[high], 0x00)
-    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
-    "xor        %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
+    "pxor       %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"
 
     "1:                                                         \n\t"
     VARIANCE_SSE_16
@@ -909,7 +909,7 @@ static inline uint32_t vpx_mse16x(const uint8_t *src_ptr, int src_stride,
     MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
     "bnez       %[tmp0],    1b                                  \n\t"
 
-    "dsrl       %[ftmp9],   %[ftmp8],       %[ftmp11]           \n\t"
+    "ssrld      %[ftmp9],   %[ftmp8],       %[ftmp11]           \n\t"
     "paddw      %[ftmp9],   %[ftmp9],       %[ftmp8]            \n\t"
     "swc1       %[ftmp9],   0x00(%[sse])                        \n\t"
     : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
@@ -951,8 +951,8 @@ static inline uint32_t vpx_mse8x(const uint8_t *src_ptr, int src_stride,
     "li         %[tmp0],    0x20                                \n\t"
     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
     MMI_L(%[tmp0], %[high], 0x00)
-    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
-    "xor        %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
+    "pxor       %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"
 
     "1:                                                         \n\t"
     VARIANCE_SSE_8
@@ -962,7 +962,7 @@ static inline uint32_t vpx_mse8x(const uint8_t *src_ptr, int src_stride,
     MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
     "bnez       %[tmp0],    1b                                  \n\t"
 
-    "dsrl       %[ftmp9],   %[ftmp8],       %[ftmp11]           \n\t"
+    "ssrld      %[ftmp9],   %[ftmp8],       %[ftmp11]           \n\t"
     "paddw      %[ftmp9],   %[ftmp9],       %[ftmp8]            \n\t"
     "swc1       %[ftmp9],   0x00(%[sse])                        \n\t"
     : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
@@ -1029,7 +1029,7 @@ static inline void var_filter_block2d_bil_16x(const uint8_t *src_ptr,
   const uint8_t *filter_y = bilinear_filters[y_offset];
 
   __asm__ volatile (
-    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
     MMI_LI(%[tmp0], 0x07)
     MMI_MTC1(%[tmp0], %[ftmp14])
     "pshufh     %[filter_x0], %[filter_x0], %[ftmp0]            \n\t"
@@ -1111,7 +1111,7 @@ static inline void var_filter_block2d_bil_8x(const uint8_t *src_ptr,
   const uint8_t *filter_y = bilinear_filters[y_offset];
 
   __asm__ volatile (
-    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
     MMI_LI(%[tmp0], 0x07)
     MMI_MTC1(%[tmp0], %[ftmp14])
     "pshufh     %[filter_x0], %[filter_x0], %[ftmp0]            \n\t"
@@ -1194,7 +1194,7 @@ static inline void var_filter_block2d_bil_4x(const uint8_t *src_ptr,
   const uint8_t *filter_y = bilinear_filters[y_offset];
 
   __asm__ volatile (
-    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
     MMI_LI(%[tmp0], 0x07)
     MMI_MTC1(%[tmp0], %[ftmp6])
     "pshufh     %[filter_x0], %[filter_x0], %[ftmp0]            \n\t"
diff --git a/vpx_dsp/mips/vpx_convolve8_mmi.c b/vpx_dsp/mips/vpx_convolve8_mmi.c
index ba9ceb866..cb7bca558 100644
--- a/vpx_dsp/mips/vpx_convolve8_mmi.c
+++ b/vpx_dsp/mips/vpx_convolve8_mmi.c
@@ -105,7 +105,7 @@ static void convolve_horiz_mmi(const uint8_t *src, ptrdiff_t src_stride,
   /* clang-format off */
   __asm__ volatile(
     "move       %[tmp1],    %[width]                   \n\t"
-    "xor        %[ftmp0],   %[ftmp0],    %[ftmp0]      \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],    %[ftmp0]      \n\t"
     "gsldlc1    %[filter1], 0x03(%[filter])            \n\t"
     "gsldrc1    %[filter1], 0x00(%[filter])            \n\t"
     "gsldlc1    %[filter2], 0x0b(%[filter])            \n\t"
@@ -178,7 +178,7 @@ static void convolve_vert_mmi(const uint8_t *src, ptrdiff_t src_stride,
   (void)y_step_q4;
 
   __asm__ volatile(
-    "xor        %[ftmp0],    %[ftmp0],   %[ftmp0]      \n\t"
+    "pxor       %[ftmp0],    %[ftmp0],   %[ftmp0]      \n\t"
     "gsldlc1    %[ftmp4],    0x03(%[filter])           \n\t"
     "gsldrc1    %[ftmp4],    0x00(%[filter])           \n\t"
     "gsldlc1    %[ftmp5],    0x0b(%[filter])           \n\t"
@@ -271,7 +271,7 @@ static void convolve_avg_horiz_mmi(const uint8_t *src, ptrdiff_t src_stride,
 
   __asm__ volatile(
     "move       %[tmp1],    %[width]                   \n\t"
-    "xor        %[ftmp0],   %[ftmp0],    %[ftmp0]      \n\t"
+    "pxor       %[ftmp0],   %[ftmp0],    %[ftmp0]      \n\t"
     "gsldlc1    %[filter1], 0x03(%[filter])            \n\t"
     "gsldrc1    %[filter1], 0x00(%[filter])            \n\t"
     "gsldlc1    %[filter2], 0x0b(%[filter])            \n\t"
@@ -354,7 +354,7 @@ static void convolve_avg_vert_mmi(const uint8_t *src, ptrdiff_t src_stride,
   (void)y_step_q4;
 
   __asm__ volatile(
-    "xor        %[ftmp0],    %[ftmp0],   %[ftmp0]      \n\t"
+    "pxor       %[ftmp0],    %[ftmp0],   %[ftmp0]      \n\t"
     "gsldlc1    %[ftmp4],    0x03(%[filter])           \n\t"
     "gsldrc1    %[ftmp4],    0x00(%[filter])           \n\t"
     "gsldlc1    %[ftmp5],    0x0b(%[filter])           \n\t"
@@ -467,7 +467,7 @@ void vpx_convolve_avg_mmi(const uint8_t *src, ptrdiff_t src_stride,
 
     __asm__ volatile(
       "move       %[tmp1],    %[width]                  \n\t"
-      "xor        %[ftmp0],   %[ftmp0],   %[ftmp0]      \n\t"
+      "pxor       %[ftmp0],   %[ftmp0],   %[ftmp0]      \n\t"
       "li         %[tmp0],    0x10001                   \n\t"
       MMI_MTC1(%[tmp0],    %[ftmp3])
       "punpcklhw  %[ftmp3],   %[ftmp3],   %[ftmp3]      \n\t"
diff --git a/vpx_ports/asmdefs_mmi.h b/vpx_ports/asmdefs_mmi.h
index 28355bf9f..400a51cc3 100644
--- a/vpx_ports/asmdefs_mmi.h
+++ b/vpx_ports/asmdefs_mmi.h
@@ -34,7 +34,7 @@
   "ld          " #reg ",        " #bias "(" #addr ")               \n\t"
 
 #define MMI_SRL(reg1, reg2, shift) \
-  "dsrl        " #reg1 ",       " #reg2 ",       " #shift "        \n\t"
+  "ssrld       " #reg1 ",       " #reg2 ",       " #shift "        \n\t"
 
 #define MMI_SLL(reg1, reg2, shift) \
   "dsll        " #reg1 ",       " #reg2 ",       " #shift "        \n\t"
@@ -63,7 +63,7 @@
   "lw          " #reg ",        " #bias "(" #addr ")               \n\t"
 
 #define MMI_SRL(reg1, reg2, shift) \
-  "srl         " #reg1 ",       " #reg2 ",       " #shift "        \n\t"
+  "ssrlw       " #reg1 ",       " #reg2 ",       " #shift "        \n\t"
 
 #define MMI_SLL(reg1, reg2, shift) \
   "sll         " #reg1 ",       " #reg2 ",       " #shift "        \n\t"