col = num_cols * 3;
asm(".set noreorder\r\n"
- "li $8, 1\r\n"
- "move $9, %3\r\n"
- "and $10, $9, $8\r\n"
- "beqz $10, 1f\r\n"
- "nop \r\n"
- "subu $9, $9, 1\r\n"
- "xor $12, $12, $12\r\n"
- "move $13, %5\r\n"
- "dadd $13, $13, $9\r\n"
- "lbu $12, 0($13)\r\n"
-
- "1: \r\n"
- "li $8, 2\r\n"
- "and $10, $9, $8\r\n"
- "beqz $10, 2f\r\n"
- "nop \r\n"
- "subu $9, $9, 2\r\n"
- "xor $11, $11, $11\r\n"
- "move $13, %5\r\n"
- "dadd $13, $13, $9\r\n"
- "lhu $11, 0($13)\r\n"
- "sll $12, $12, 16\r\n"
- "or $12, $12, $11\r\n"
-
- "2: \r\n"
- "dmtc1 $12, %0\r\n"
- "li $8, 4\r\n"
- "and $10, $9, $8\r\n"
- "beqz $10, 3f\r\n"
- "nop \r\n"
- "subu $9, $9, 4\r\n"
- "move $13, %5\r\n"
- "dadd $13, $13, $9\r\n"
- "lwu $14, 0($13)\r\n"
- "dmtc1 $14, %1\r\n"
- "dsll32 $12, $12, 0\r\n"
- "or $12, $12, $14\r\n"
- "dmtc1 $12, %0\r\n"
-
- "3: \r\n"
- "li $8, 8\r\n"
- "and $10, $9, $8\r\n"
- "beqz $10, 4f\r\n"
- "nop \r\n"
- "mov.s %1, %0\r\n"
- "ldc1 %0, 0(%5)\r\n"
- "li $9, 8\r\n"
- "j 5f\r\n"
- "nop \r\n"
-
- "4: \r\n"
- "li $8, 16\r\n"
- "and $10, $9, $8\r\n"
- "beqz $10, 5f\r\n"
- "nop \r\n"
- "mov.s %2, %0\r\n"
- "ldc1 %0, 0(%5)\r\n"
- "ldc1 %1, 8(%5)\r\n"
-
- "5: \r\n"
- "nop \r\n"
+ "li $8, 1\r\n"
+ "move $9, %3\r\n"
+ "and $10, $9, $8\r\n"
+ "beqz $10, 1f\r\n"
+ "nop \r\n"
+ "subu $9, $9, 1\r\n"
+ "xor $12, $12, $12\r\n"
+ "move $13, %5\r\n"
+ PTR_ADDU "$13, $13, $9\r\n"
+ "lbu $12, 0($13)\r\n"
+
+ "1: \r\n"
+ "li $8, 2\r\n"
+ "and $10, $9, $8\r\n"
+ "beqz $10, 2f\r\n"
+ "nop \r\n"
+ "subu $9, $9, 2\r\n"
+ "xor $11, $11, $11\r\n"
+ "move $13, %5\r\n"
+ PTR_ADDU "$13, $13, $9\r\n"
+ "lhu $11, 0($13)\r\n"
+ "sll $12, $12, 16\r\n"
+ "or $12, $12, $11\r\n"
+
+ "2: \r\n"
+ "dmtc1 $12, %0\r\n"
+ "li $8, 4\r\n"
+ "and $10, $9, $8\r\n"
+ "beqz $10, 3f\r\n"
+ "nop \r\n"
+ "subu $9, $9, 4\r\n"
+ "move $13, %5\r\n"
+ PTR_ADDU "$13, $13, $9\r\n"
+ "lwu $14, 0($13)\r\n"
+ "dmtc1 $14, %1\r\n"
+ "dsll32 $12, $12, 0\r\n"
+ "or $12, $12, $14\r\n"
+ "dmtc1 $12, %0\r\n"
+
+ "3: \r\n"
+ "li $8, 8\r\n"
+ "and $10, $9, $8\r\n"
+ "beqz $10, 4f\r\n"
+ "nop \r\n"
+ "mov.s %1, %0\r\n"
+ "ldc1 %0, 0(%5)\r\n"
+ "li $9, 8\r\n"
+ "j 5f\r\n"
+ "nop \r\n"
+
+ "4: \r\n"
+ "li $8, 16\r\n"
+ "and $10, $9, $8\r\n"
+ "beqz $10, 5f\r\n"
+ "nop \r\n"
+ "mov.s %2, %0\r\n"
+ "ldc1 %0, 0(%5)\r\n"
+ "ldc1 %1, 8(%5)\r\n"
+
+ "5: \r\n"
+ "nop \r\n"
".set reorder\r\n"
: "=f" (mmA), "=f" (mmG), "=f" (mmF)
col = num_cols;
asm(".set noreorder\r\n"
- "li $8, 1\r\n"
- "move $9, %4\r\n"
- "and $10, $9, $8\r\n"
- "beqz $10, 1f\r\n"
- "nop \r\n"
- "subu $9, $9, 1\r\n"
- "dsll $11, $9, 2\r\n"
- "move $13, %5\r\n"
- "daddu $13, $13, $11\r\n"
- "lwc1 %0, 0($13)\r\n"
-
- "1: \r\n"
- "li $8, 2\r\n"
- "and $10, $9, $8\r\n"
- "beqz $10, 2f\r\n"
- "nop \r\n"
- "subu $9, $9, 2\r\n"
- "dsll $11, $9, 2\r\n"
- "move $13, %5\r\n"
- "daddu $13, $13, $11\r\n"
- "mov.s %1, %0\r\n"
- "ldc1 %0, 0($13)\r\n"
-
- "2: \r\n"
- "li $8, 4\r\n"
- "and $10, $9, $8\r\n"
- "beqz $10, 3f\r\n"
- "nop \r\n"
- "mov.s %2, %0\r\n"
- "mov.s %3, %1\r\n"
- "ldc1 %0, 0(%5)\r\n"
- "ldc1 %1, 8(%5)\r\n"
-
- "3: \r\n"
- "nop \r\n"
+ "li $8, 1\r\n"
+ "move $9, %4\r\n"
+ "and $10, $9, $8\r\n"
+ "beqz $10, 1f\r\n"
+ "nop \r\n"
+ "subu $9, $9, 1\r\n"
+ PTR_SLL "$11, $9, 2\r\n"
+ "move $13, %5\r\n"
+ PTR_ADDU "$13, $13, $11\r\n"
+ "lwc1 %0, 0($13)\r\n"
+
+ "1: \r\n"
+ "li $8, 2\r\n"
+ "and $10, $9, $8\r\n"
+ "beqz $10, 2f\r\n"
+ "nop \r\n"
+ "subu $9, $9, 2\r\n"
+ PTR_SLL "$11, $9, 2\r\n"
+ "move $13, %5\r\n"
+ PTR_ADDU "$13, $13, $11\r\n"
+ "mov.s %1, %0\r\n"
+ "ldc1 %0, 0($13)\r\n"
+
+ "2: \r\n"
+ "li $8, 4\r\n"
+ "and $10, $9, $8\r\n"
+ "beqz $10, 3f\r\n"
+ "nop \r\n"
+ "mov.s %2, %0\r\n"
+ "mov.s %3, %1\r\n"
+ "ldc1 %0, 0(%5)\r\n"
+ "ldc1 %1, 8(%5)\r\n"
+
+ "3: \r\n"
+ "nop \r\n"
".set reorder\r\n"
: "=f" (mmA), "=f" (mmF), "=f" (mmD), "=f" (mmC)
col = num_cols * 3;
asm(".set noreorder\r\n"
- "li $8, 1\r\n"
- "move $9, %3\r\n"
- "and $10, $9, $8\r\n"
- "beqz $10, 1f\r\n"
- "nop \r\n"
- "subu $9, $9, 1\r\n"
- "xor $12, $12, $12\r\n"
- "move $13, %5\r\n"
- "dadd $13, $13, $9\r\n"
- "lbu $12, 0($13)\r\n"
-
- "1: \r\n"
- "li $8, 2\r\n"
- "and $10, $9, $8\r\n"
- "beqz $10, 2f\r\n"
- "nop \r\n"
- "subu $9, $9, 2\r\n"
- "xor $11, $11, $11\r\n"
- "move $13, %5\r\n"
- "dadd $13, $13, $9\r\n"
- "lhu $11, 0($13)\r\n"
- "sll $12, $12, 16\r\n"
- "or $12, $12, $11\r\n"
-
- "2: \r\n"
- "dmtc1 $12, %0\r\n"
- "li $8, 4\r\n"
- "and $10, $9, $8\r\n"
- "beqz $10, 3f\r\n"
- "nop \r\n"
- "subu $9, $9, 4\r\n"
- "move $13, %5\r\n"
- "dadd $13, $13, $9\r\n"
- "lwu $14, 0($13)\r\n"
- "dmtc1 $14, %1\r\n"
- "dsll32 $12, $12, 0\r\n"
- "or $12, $12, $14\r\n"
- "dmtc1 $12, %0\r\n"
-
- "3: \r\n"
- "li $8, 8\r\n"
- "and $10, $9, $8\r\n"
- "beqz $10, 4f\r\n"
- "nop \r\n"
- "mov.s %1, %0\r\n"
- "ldc1 %0, 0(%5)\r\n"
- "li $9, 8\r\n"
- "j 5f\r\n"
- "nop \r\n"
-
- "4: \r\n"
- "li $8, 16\r\n"
- "and $10, $9, $8\r\n"
- "beqz $10, 5f\r\n"
- "nop \r\n"
- "mov.s %2, %0\r\n"
- "ldc1 %0, 0(%5)\r\n"
- "ldc1 %1, 8(%5)\r\n"
-
- "5: \r\n"
- "nop \r\n"
+ "li $8, 1\r\n"
+ "move $9, %3\r\n"
+ "and $10, $9, $8\r\n"
+ "beqz $10, 1f\r\n"
+ "nop \r\n"
+ "subu $9, $9, 1\r\n"
+ "xor $12, $12, $12\r\n"
+ "move $13, %5\r\n"
+ PTR_ADDU "$13, $13, $9\r\n"
+ "lbu $12, 0($13)\r\n"
+
+ "1: \r\n"
+ "li $8, 2\r\n"
+ "and $10, $9, $8\r\n"
+ "beqz $10, 2f\r\n"
+ "nop \r\n"
+ "subu $9, $9, 2\r\n"
+ "xor $11, $11, $11\r\n"
+ "move $13, %5\r\n"
+ PTR_ADDU "$13, $13, $9\r\n"
+ "lhu $11, 0($13)\r\n"
+ "sll $12, $12, 16\r\n"
+ "or $12, $12, $11\r\n"
+
+ "2: \r\n"
+ "dmtc1 $12, %0\r\n"
+ "li $8, 4\r\n"
+ "and $10, $9, $8\r\n"
+ "beqz $10, 3f\r\n"
+ "nop \r\n"
+ "subu $9, $9, 4\r\n"
+ "move $13, %5\r\n"
+ PTR_ADDU "$13, $13, $9\r\n"
+ "lwu $14, 0($13)\r\n"
+ "dmtc1 $14, %1\r\n"
+ "dsll32 $12, $12, 0\r\n"
+ "or $12, $12, $14\r\n"
+ "dmtc1 $12, %0\r\n"
+
+ "3: \r\n"
+ "li $8, 8\r\n"
+ "and $10, $9, $8\r\n"
+ "beqz $10, 4f\r\n"
+ "nop \r\n"
+ "mov.s %1, %0\r\n"
+ "ldc1 %0, 0(%5)\r\n"
+ "li $9, 8\r\n"
+ "j 5f\r\n"
+ "nop \r\n"
+
+ "4: \r\n"
+ "li $8, 16\r\n"
+ "and $10, $9, $8\r\n"
+ "beqz $10, 5f\r\n"
+ "nop \r\n"
+ "mov.s %2, %0\r\n"
+ "ldc1 %0, 0(%5)\r\n"
+ "ldc1 %1, 8(%5)\r\n"
+
+ "5: \r\n"
+ "nop \r\n"
".set reorder\r\n"
: "=f" (mmA), "=f" (mmG), "=f" (mmF)
col = num_cols;
asm(".set noreorder\r\n"
- "li $8, 1\r\n"
- "move $9, %4\r\n"
- "and $10, $9, $8\r\n"
- "beqz $10, 1f\r\n"
- "nop \r\n"
- "subu $9, $9, 1\r\n"
- "dsll $11, $9, 2\r\n"
- "move $13, %5\r\n"
- "daddu $13, $13, $11\r\n"
- "lwc1 %0, 0($13)\r\n"
-
- "1: \r\n"
- "li $8, 2\r\n"
- "and $10, $9, $8\r\n"
- "beqz $10, 2f\r\n"
- "nop \r\n"
- "subu $9, $9, 2\r\n"
- "dsll $11, $9, 2\r\n"
- "move $13, %5\r\n"
- "daddu $13, $13, $11\r\n"
- "mov.s %1, %0\r\n"
- "ldc1 %0, 0($13)\r\n"
-
- "2: \r\n"
- "li $8, 4\r\n"
- "and $10, $9, $8\r\n"
- "beqz $10, 3f\r\n"
- "nop \r\n"
- "mov.s %2, %0\r\n"
- "mov.s %3, %1\r\n"
- "ldc1 %0, 0(%5)\r\n"
- "ldc1 %1, 8(%5)\r\n"
-
- "3: \r\n"
- "nop \r\n"
+ "li $8, 1\r\n"
+ "move $9, %4\r\n"
+ "and $10, $9, $8\r\n"
+ "beqz $10, 1f\r\n"
+ "nop \r\n"
+ "subu $9, $9, 1\r\n"
+ PTR_SLL "$11, $9, 2\r\n"
+ "move $13, %5\r\n"
+ PTR_ADDU "$13, $13, $11\r\n"
+ "lwc1 %0, 0($13)\r\n"
+
+ "1: \r\n"
+ "li $8, 2\r\n"
+ "and $10, $9, $8\r\n"
+ "beqz $10, 2f\r\n"
+ "nop \r\n"
+ "subu $9, $9, 2\r\n"
+ PTR_SLL "$11, $9, 2\r\n"
+ "move $13, %5\r\n"
+ PTR_ADDU "$13, $13, $11\r\n"
+ "mov.s %1, %0\r\n"
+ "ldc1 %0, 0($13)\r\n"
+
+ "2: \r\n"
+ "li $8, 4\r\n"
+ "and $10, $9, $8\r\n"
+ "beqz $10, 3f\r\n"
+ "nop \r\n"
+ "mov.s %2, %0\r\n"
+ "mov.s %3, %1\r\n"
+ "ldc1 %0, 0(%5)\r\n"
+ "ldc1 %1, 8(%5)\r\n"
+
+ "3: \r\n"
+ "nop \r\n"
".set reorder\r\n"
: "=f" (mmA), "=f" (mmF), "=f" (mmD), "=f" (mmC)
col = num_cols * 3;
asm(".set noreorder\r\n"
- "li $8, 16\r\n"
- "move $9, %4\r\n"
- "mov.s $f4, %1\r\n"
- "mov.s $f6, %3\r\n"
- "move $10, %5\r\n"
- "bltu $9, $8, 1f\r\n"
- "nop \r\n"
- "gssdlc1 $f4, 7($10)\r\n"
- "gssdrc1 $f4, 0($10)\r\n"
- "gssdlc1 $f6, 7+8($10)\r\n"
- "gssdrc1 $f6, 8($10)\r\n"
- "mov.s $f4, %2\r\n"
- "subu $9, $9, 16\r\n"
- "daddu $10, $10, 16\r\n"
- "b 2f\r\n"
- "nop \r\n"
-
- "1: \r\n"
- "li $8, 8\r\n" /* st8 */
- "bltu $9, $8, 2f\r\n"
- "nop \r\n"
- "gssdlc1 $f4, 7($10)\r\n"
- "gssdrc1 $f4, ($10)\r\n"
- "mov.s $f4, %3\r\n"
- "subu $9, $9, 8\r\n"
- "daddu $10, $10, 8\r\n"
-
- "2: \r\n"
- "li $8, 4\r\n" /* st4 */
- "mfc1 $11, $f4\r\n"
- "bltu $9, $8, 3f\r\n"
- "nop \r\n"
- "swl $11, 3($10)\r\n"
- "swr $11, 0($10)\r\n"
- "li $8, 32\r\n"
- "mtc1 $8, $f6\r\n"
- "dsrl $f4, $f4, $f6\r\n"
- "mfc1 $11, $f4\r\n"
- "subu $9, $9, 4\r\n"
- "daddu $10, $10, 4\r\n"
-
- "3: \r\n"
- "li $8, 2\r\n" /* st2 */
- "bltu $9, $8, 4f\r\n"
- "nop \r\n"
- "ush $11, 0($10)\r\n"
- "srl $11, 16\r\n"
- "subu $9, $9, 2\r\n"
- "daddu $10, $10, 2\r\n"
-
- "4: \r\n"
- "li $8, 1\r\n" /* st1 */
- "bltu $9, $8, 5f\r\n"
- "nop \r\n"
- "sb $11, 0($10)\r\n"
-
- "5: \r\n"
- "nop \r\n" /* end */
+ "li $8, 16\r\n"
+ "move $9, %4\r\n"
+ "mov.s $f4, %1\r\n"
+ "mov.s $f6, %3\r\n"
+ "move $10, %5\r\n"
+ "bltu $9, $8, 1f\r\n"
+ "nop \r\n"
+ "gssdlc1 $f4, 7($10)\r\n"
+ "gssdrc1 $f4, 0($10)\r\n"
+ "gssdlc1 $f6, 7+8($10)\r\n"
+ "gssdrc1 $f6, 8($10)\r\n"
+ "mov.s $f4, %2\r\n"
+ "subu $9, $9, 16\r\n"
+ PTR_ADDU "$10, $10, 16\r\n"
+ "b 2f\r\n"
+ "nop \r\n"
+
+ "1: \r\n"
+ "li $8, 8\r\n" /* st8 */
+ "bltu $9, $8, 2f\r\n"
+ "nop \r\n"
+ "gssdlc1 $f4, 7($10)\r\n"
+ "gssdrc1 $f4, 0($10)\r\n"
+ "mov.s $f4, %3\r\n"
+ "subu $9, $9, 8\r\n"
+ PTR_ADDU "$10, $10, 8\r\n"
+
+ "2: \r\n"
+ "li $8, 4\r\n" /* st4 */
+ "mfc1 $11, $f4\r\n"
+ "bltu $9, $8, 3f\r\n"
+ "nop \r\n"
+ "swl $11, 3($10)\r\n"
+ "swr $11, 0($10)\r\n"
+ "li $8, 32\r\n"
+ "mtc1 $8, $f6\r\n"
+ "dsrl $f4, $f4, $f6\r\n"
+ "mfc1 $11, $f4\r\n"
+ "subu $9, $9, 4\r\n"
+ PTR_ADDU "$10, $10, 4\r\n"
+
+ "3: \r\n"
+ "li $8, 2\r\n" /* st2 */
+ "bltu $9, $8, 4f\r\n"
+ "nop \r\n"
+ "ush $11, 0($10)\r\n"
+ "srl $11, 16\r\n"
+ "subu $9, $9, 2\r\n"
+ PTR_ADDU "$10, $10, 2\r\n"
+
+ "4: \r\n"
+ "li $8, 1\r\n" /* st1 */
+ "bltu $9, $8, 5f\r\n"
+ "nop \r\n"
+ "sb $11, 0($10)\r\n"
+
+ "5: \r\n"
+ "nop \r\n" /* end */
: "=m" (*outptr)
: "f" (mmA), "f" (mmC), "f" (mmE), "r" (col), "r" (outptr)
: "$f4", "$f6", "$8", "$9", "$10", "$11", "memory"
col = num_cols;
asm(".set noreorder\r\n" /* st16 */
- "li $8, 4\r\n"
- "move $9, %6\r\n"
- "move $10, %7\r\n"
- "mov.s $f4, %2\r\n"
- "mov.s $f6, %4\r\n"
- "bltu $9, $8, 1f\r\n"
- "nop \r\n"
- "gssdlc1 $f4, 7($10)\r\n"
- "gssdrc1 $f4, ($10)\r\n"
- "gssdlc1 $f6, 7+8($10)\r\n"
- "gssdrc1 $f6, 8($10)\r\n"
- "mov.s $f4, %3\r\n"
- "mov.s $f6, %5\r\n"
- "subu $9, $9, 4\r\n"
- "daddu $10, $10, 16\r\n"
-
- "1: \r\n"
- "li $8, 2\r\n" /* st8 */
- "bltu $9, $8, 2f\r\n"
- "nop \r\n"
- "gssdlc1 $f4, 7($10)\r\n"
- "gssdrc1 $f4, 0($10)\r\n"
- "mov.s $f4, $f6\r\n"
- "subu $9, $9, 2\r\n"
- "daddu $10, $10, 8\r\n"
-
- "2: \r\n"
- "li $8, 1\r\n" /* st4 */
- "bltu $9, $8, 3f\r\n"
- "nop \r\n"
- "gsswlc1 $f4, 3($10)\r\n"
- "gsswrc1 $f4, 0($10)\r\n"
-
- "3: \r\n"
- "li %1, 0\r\n" /* end */
+ "li $8, 4\r\n"
+ "move $9, %6\r\n"
+ "move $10, %7\r\n"
+ "mov.s $f4, %2\r\n"
+ "mov.s $f6, %4\r\n"
+ "bltu $9, $8, 1f\r\n"
+ "nop \r\n"
+ "gssdlc1 $f4, 7($10)\r\n"
+ "gssdrc1 $f4, 0($10)\r\n"
+ "gssdlc1 $f6, 7+8($10)\r\n"
+ "gssdrc1 $f6, 8($10)\r\n"
+ "mov.s $f4, %3\r\n"
+ "mov.s $f6, %5\r\n"
+ "subu $9, $9, 4\r\n"
+ PTR_ADDU "$10, $10, 16\r\n"
+
+ "1: \r\n"
+ "li $8, 2\r\n" /* st8 */
+ "bltu $9, $8, 2f\r\n"
+ "nop \r\n"
+ "gssdlc1 $f4, 7($10)\r\n"
+ "gssdrc1 $f4, 0($10)\r\n"
+ "mov.s $f4, $f6\r\n"
+ "subu $9, $9, 2\r\n"
+ PTR_ADDU "$10, $10, 8\r\n"
+
+ "2: \r\n"
+ "li $8, 1\r\n" /* st4 */
+ "bltu $9, $8, 3f\r\n"
+ "nop \r\n"
+ "gsswlc1 $f4, 3($10)\r\n"
+ "gsswrc1 $f4, 0($10)\r\n"
+
+ "3: \r\n"
+ "li %1, 0\r\n" /* end */
: "=m" (*outptr), "=r" (col)
: "f" (mmA), "f" (mmC), "f" (mmD), "f" (mmH), "r" (col),
"r" (outptr)
"mov.s $f6, %5\r\n"
"mov.s $f8, %6\r\n"
"subu $9, $9, 24\r\n"
- "daddu $10, $10, 24\r\n"
+ PTR_ADDU "$10, $10, 24\r\n"
"1: \r\n"
"li $8, 16\r\n" /* st16 */
"gssdrc1 $f6, 8($10)\r\n"
"mov.s $f4, $f8\r\n"
"subu $9, $9, 16\r\n"
- "daddu $10, $10, 16\r\n"
+ PTR_ADDU "$10, $10, 16\r\n"
"2: \r\n"
"li $8, 8\r\n" /* st8 */
"gssdrc1 $f4, 0($10)\r\n"
"mov.s $f4, $f6\r\n"
"subu $9, $9, 8\r\n"
- "daddu $10, $10, 8\r\n"
+ PTR_ADDU "$10, $10, 8\r\n"
"3: \r\n"
"li $8, 4\r\n" /* st4 */
"dsrl $f4, $f4, $f6\r\n"
"mfc1 $11, $f4\r\n"
"subu $9, $9, 4\r\n"
- "daddu $10, $10, 4\r\n"
+ PTR_ADDU "$10, $10, 4\r\n"
"4: \r\n"
"li $8, 2\r\n" /* st2 */
"ush $11, 0($10)\r\n"
"srl $11, 16\r\n"
"subu $9, $9, 2\r\n"
- "daddu $10, $10, 2\r\n"
+ PTR_ADDU "$10, $10, 2\r\n"
"5: \r\n"
"li $8, 1\r\n" /* st1 */
"bltu $9, $8, 1f\r\n"
"nop \r\n"
"gssdlc1 $f4, 7($10)\r\n"
- "gssdrc1 $f4, ($10)\r\n"
+ "gssdrc1 $f4, 0($10)\r\n"
"gssdlc1 $f6, 7+8($10)\r\n"
"gssdrc1 $f6, 8($10)\r\n"
"gssdlc1 $f8, 7+16($10)\r\n"
"mov.s $f8, %8\r\n"
"mov.s $f10, %9\r\n"
"subu $9, $9, 8\r\n"
- "daddu $10, $10, 32\r\n"
+ PTR_ADDU "$10, $10, 32\r\n"
"1: \r\n"
"li $8, 4\r\n" /* st16 */
"mov.s $f4, $f8\r\n"
"mov.s $f6, $f10\r\n"
"subu $9, $9, 4\r\n"
- "daddu $10, $10, 16\r\n"
+ PTR_ADDU "$10, $10, 16\r\n"
"2: \r\n"
"li $8, 2\r\n" /* st8 */
"gssdrc1 $f4, 0($10)\r\n"
"mov.s $f4, $f6\r\n"
"subu $9, $9, 2\r\n"
- "daddu $10, $10, 8\r\n"
+ PTR_ADDU "$10, $10, 8\r\n"
"3: \r\n"
"li $8, 1\r\n" /* st4 */
if (downsampled_width & 7) {
tmp = (downsampled_width - 1) * sizeof(JSAMPLE);
tmp1 = downsampled_width * sizeof(JSAMPLE);
- asm("daddu $8, %3, %6\r\n"
- "lb $9, ($8)\r\n"
- "daddu $8, %3, %7\r\n"
- "sb $9, ($8)\r\n"
- "daddu $8, %4, %6\r\n"
- "lb $9, ($8)\r\n"
- "daddu $8, %4, %7\r\n"
- "sb $9, ($8)\r\n"
- "daddu $8, %5, %6\r\n"
- "lb $9, ($8)\r\n"
- "daddu $8, %5, %7\r\n"
- "sb $9, ($8)\r\n"
+ asm(PTR_ADDU "$8, %3, %6\r\n"
+ "lb $9, ($8)\r\n"
+ PTR_ADDU "$8, %3, %7\r\n"
+ "sb $9, ($8)\r\n"
+ PTR_ADDU "$8, %4, %6\r\n"
+ "lb $9, ($8)\r\n"
+ PTR_ADDU "$8, %4, %7\r\n"
+ "sb $9, ($8)\r\n"
+ PTR_ADDU "$8, %5, %6\r\n"
+ "lb $9, ($8)\r\n"
+ PTR_ADDU "$8, %5, %7\r\n"
+ "sb $9, ($8)\r\n"
: "=m" (*inptr_1), "=m" (*inptr0), "=m" (*inptr1)
: "r" (inptr_1), "r" (inptr0), "r" (inptr1), "r" (tmp), "r" (tmp1)
: "$8", "$9"
if (downsampled_width & 7) {
tmp = (downsampled_width - 1) * sizeof(JSAMPLE);
tmp1 = downsampled_width * sizeof(JSAMPLE);
- asm("daddu $8, %1, %2\r\n"
- "lb $9, ($8)\r\n"
- "daddu $8, %1, %3\r\n"
- "sb $9, ($8)\r\n"
+ asm(PTR_ADDU "$8, %1, %2\r\n"
+ "lb $9, ($8)\r\n"
+ PTR_ADDU "$8, %1, %3\r\n"
+ "sb $9, ($8)\r\n"
: "=m" (*inptr0)
: "r" (inptr0), "r" (tmp), "r" (tmp1)
: "$8", "$9"
/* Common code */
+#if defined(_ABI64) && _MIPS_SIM == _ABI64
+# define PTR_ADDU "daddu "
+# define PTR_SLL "dsll "
+#else
+# define PTR_ADDU "addu "
+# define PTR_SLL "sll "
+#endif
#define SIZEOF_MMWORD 8
#define BYTE_BIT 8