From: DRC Date: Wed, 27 Feb 2019 19:05:58 +0000 (-0600) Subject: MMI: Support 32-bit Loongson architectures X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=afbe48c2905019afd60d74673b7845e118ad7f28;p=libjpeg-turbo MMI: Support 32-bit Loongson architectures --- diff --git a/simd/loongson/jccolext-mmi.c b/simd/loongson/jccolext-mmi.c index 4b7f402..558eb2a 100644 --- a/simd/loongson/jccolext-mmi.c +++ b/simd/loongson/jccolext-mmi.c @@ -124,67 +124,67 @@ void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf, col = num_cols * 3; asm(".set noreorder\r\n" - "li $8, 1\r\n" - "move $9, %3\r\n" - "and $10, $9, $8\r\n" - "beqz $10, 1f\r\n" - "nop \r\n" - "subu $9, $9, 1\r\n" - "xor $12, $12, $12\r\n" - "move $13, %5\r\n" - "dadd $13, $13, $9\r\n" - "lbu $12, 0($13)\r\n" - - "1: \r\n" - "li $8, 2\r\n" - "and $10, $9, $8\r\n" - "beqz $10, 2f\r\n" - "nop \r\n" - "subu $9, $9, 2\r\n" - "xor $11, $11, $11\r\n" - "move $13, %5\r\n" - "dadd $13, $13, $9\r\n" - "lhu $11, 0($13)\r\n" - "sll $12, $12, 16\r\n" - "or $12, $12, $11\r\n" - - "2: \r\n" - "dmtc1 $12, %0\r\n" - "li $8, 4\r\n" - "and $10, $9, $8\r\n" - "beqz $10, 3f\r\n" - "nop \r\n" - "subu $9, $9, 4\r\n" - "move $13, %5\r\n" - "dadd $13, $13, $9\r\n" - "lwu $14, 0($13)\r\n" - "dmtc1 $14, %1\r\n" - "dsll32 $12, $12, 0\r\n" - "or $12, $12, $14\r\n" - "dmtc1 $12, %0\r\n" - - "3: \r\n" - "li $8, 8\r\n" - "and $10, $9, $8\r\n" - "beqz $10, 4f\r\n" - "nop \r\n" - "mov.s %1, %0\r\n" - "ldc1 %0, 0(%5)\r\n" - "li $9, 8\r\n" - "j 5f\r\n" - "nop \r\n" - - "4: \r\n" - "li $8, 16\r\n" - "and $10, $9, $8\r\n" - "beqz $10, 5f\r\n" - "nop \r\n" - "mov.s %2, %0\r\n" - "ldc1 %0, 0(%5)\r\n" - "ldc1 %1, 8(%5)\r\n" - - "5: \r\n" - "nop \r\n" + "li $8, 1\r\n" + "move $9, %3\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 1f\r\n" + "nop \r\n" + "subu $9, $9, 1\r\n" + "xor $12, $12, $12\r\n" + "move $13, %5\r\n" + PTR_ADDU "$13, $13, $9\r\n" + "lbu $12, 0($13)\r\n" + + "1: \r\n" + "li $8, 2\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 2f\r\n" + "nop \r\n" + "subu $9, $9, 2\r\n" + "xor $11, $11, $11\r\n" + "move $13, %5\r\n" + PTR_ADDU "$13, $13, $9\r\n" + "lhu $11, 0($13)\r\n" + "sll $12, $12, 16\r\n" + "or $12, $12, $11\r\n" + + "2: \r\n" + "dmtc1 $12, %0\r\n" + "li $8, 4\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 3f\r\n" + "nop \r\n" + "subu $9, $9, 4\r\n" + "move $13, %5\r\n" + PTR_ADDU "$13, $13, $9\r\n" + "lwu $14, 0($13)\r\n" + "dmtc1 $14, %1\r\n" + "dsll32 $12, $12, 0\r\n" + "or $12, $12, $14\r\n" + "dmtc1 $12, %0\r\n" + + "3: \r\n" + "li $8, 8\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 4f\r\n" + "nop \r\n" + "mov.s %1, %0\r\n" + "ldc1 %0, 0(%5)\r\n" + "li $9, 8\r\n" + "j 5f\r\n" + "nop \r\n" + + "4: \r\n" + "li $8, 16\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 5f\r\n" + "nop \r\n" + "mov.s %2, %0\r\n" + "ldc1 %0, 0(%5)\r\n" + "ldc1 %1, 8(%5)\r\n" + + "5: \r\n" + "nop \r\n" ".set reorder\r\n" : "=f" (mmA), "=f" (mmG), "=f" (mmF) @@ -236,41 +236,41 @@ void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf, col = num_cols; asm(".set noreorder\r\n" - "li $8, 1\r\n" - "move $9, %4\r\n" - "and $10, $9, $8\r\n" - "beqz $10, 1f\r\n" - "nop \r\n" - "subu $9, $9, 1\r\n" - "dsll $11, $9, 2\r\n" - "move $13, %5\r\n" - "daddu $13, $13, $11\r\n" - "lwc1 %0, 0($13)\r\n" - - "1: \r\n" - "li $8, 2\r\n" - "and $10, $9, $8\r\n" - "beqz $10, 2f\r\n" - "nop \r\n" - "subu $9, $9, 2\r\n" - "dsll $11, $9, 2\r\n" - "move $13, %5\r\n" - "daddu $13, $13, $11\r\n" - "mov.s %1, %0\r\n" - "ldc1 %0, 0($13)\r\n" - - "2: \r\n" - "li $8, 4\r\n" - "and $10, $9, $8\r\n" - "beqz $10, 3f\r\n" - "nop \r\n" - "mov.s %2, %0\r\n" - "mov.s %3, %1\r\n" - "ldc1 %0, 0(%5)\r\n" - "ldc1 %1, 8(%5)\r\n" - - "3: \r\n" - "nop \r\n" + "li $8, 1\r\n" + "move $9, %4\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 1f\r\n" + "nop \r\n" + "subu $9, $9, 1\r\n" + PTR_SLL "$11, $9, 2\r\n" + "move $13, %5\r\n" + PTR_ADDU "$13, $13, $11\r\n" + "lwc1 %0, 0($13)\r\n" + + "1: \r\n" + "li $8, 2\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 2f\r\n" + "nop \r\n" + "subu $9, $9, 2\r\n" + PTR_SLL "$11, $9, 2\r\n" + "move $13, %5\r\n" + PTR_ADDU "$13, $13, $11\r\n" + "mov.s %1, %0\r\n" + "ldc1 %0, 0($13)\r\n" + + "2: \r\n" + "li $8, 4\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 3f\r\n" + "nop \r\n" + "mov.s %2, %0\r\n" + "mov.s %3, %1\r\n" + "ldc1 %0, 0(%5)\r\n" + "ldc1 %1, 8(%5)\r\n" + + "3: \r\n" + "nop \r\n" ".set reorder\r\n" : "=f" (mmA), "=f" (mmF), "=f" (mmD), "=f" (mmC) diff --git a/simd/loongson/jcgryext-mmi.c b/simd/loongson/jcgryext-mmi.c index 3d3c8f6..08a83d6 100644 --- a/simd/loongson/jcgryext-mmi.c +++ b/simd/loongson/jcgryext-mmi.c @@ -115,67 +115,67 @@ void jsimd_rgb_gray_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf, col = num_cols * 3; asm(".set noreorder\r\n" - "li $8, 1\r\n" - "move $9, %3\r\n" - "and $10, $9, $8\r\n" - "beqz $10, 1f\r\n" - "nop \r\n" - "subu $9, $9, 1\r\n" - "xor $12, $12, $12\r\n" - "move $13, %5\r\n" - "dadd $13, $13, $9\r\n" - "lbu $12, 0($13)\r\n" - - "1: \r\n" - "li $8, 2\r\n" - "and $10, $9, $8\r\n" - "beqz $10, 2f\r\n" - "nop \r\n" - "subu $9, $9, 2\r\n" - "xor $11, $11, $11\r\n" - "move $13, %5\r\n" - "dadd $13, $13, $9\r\n" - "lhu $11, 0($13)\r\n" - "sll $12, $12, 16\r\n" - "or $12, $12, $11\r\n" - - "2: \r\n" - "dmtc1 $12, %0\r\n" - "li $8, 4\r\n" - "and $10, $9, $8\r\n" - "beqz $10, 3f\r\n" - "nop \r\n" - "subu $9, $9, 4\r\n" - "move $13, %5\r\n" - "dadd $13, $13, $9\r\n" - "lwu $14, 0($13)\r\n" - "dmtc1 $14, %1\r\n" - "dsll32 $12, $12, 0\r\n" - "or $12, $12, $14\r\n" - "dmtc1 $12, %0\r\n" - - "3: \r\n" - "li $8, 8\r\n" - "and $10, $9, $8\r\n" - "beqz $10, 4f\r\n" - "nop \r\n" - "mov.s %1, %0\r\n" - "ldc1 %0, 0(%5)\r\n" - "li $9, 8\r\n" - "j 5f\r\n" - "nop \r\n" - - "4: \r\n" - "li $8, 16\r\n" - "and $10, $9, $8\r\n" - "beqz $10, 5f\r\n" - "nop \r\n" - "mov.s %2, %0\r\n" - "ldc1 %0, 0(%5)\r\n" - "ldc1 %1, 8(%5)\r\n" - - "5: \r\n" - "nop \r\n" + "li $8, 1\r\n" + "move $9, %3\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 1f\r\n" + "nop \r\n" + "subu $9, $9, 1\r\n" + "xor $12, $12, $12\r\n" + "move $13, %5\r\n" + PTR_ADDU "$13, $13, $9\r\n" + "lbu $12, 0($13)\r\n" + + "1: \r\n" + "li $8, 2\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 2f\r\n" + "nop \r\n" + "subu $9, $9, 2\r\n" + "xor $11, $11, $11\r\n" + "move $13, %5\r\n" + PTR_ADDU "$13, $13, $9\r\n" + "lhu $11, 0($13)\r\n" + "sll $12, $12, 16\r\n" + "or $12, $12, $11\r\n" + + "2: \r\n" + "dmtc1 $12, %0\r\n" + "li $8, 4\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 3f\r\n" + "nop \r\n" + "subu $9, $9, 4\r\n" + "move $13, %5\r\n" + PTR_ADDU "$13, $13, $9\r\n" + "lwu $14, 0($13)\r\n" + "dmtc1 $14, %1\r\n" + "dsll32 $12, $12, 0\r\n" + "or $12, $12, $14\r\n" + "dmtc1 $12, %0\r\n" + + "3: \r\n" + "li $8, 8\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 4f\r\n" + "nop \r\n" + "mov.s %1, %0\r\n" + "ldc1 %0, 0(%5)\r\n" + "li $9, 8\r\n" + "j 5f\r\n" + "nop \r\n" + + "4: \r\n" + "li $8, 16\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 5f\r\n" + "nop \r\n" + "mov.s %2, %0\r\n" + "ldc1 %0, 0(%5)\r\n" + "ldc1 %1, 8(%5)\r\n" + + "5: \r\n" + "nop \r\n" ".set reorder\r\n" : "=f" (mmA), "=f" (mmG), "=f" (mmF) @@ -227,41 +227,41 @@ void jsimd_rgb_gray_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf, col = num_cols; asm(".set noreorder\r\n" - "li $8, 1\r\n" - "move $9, %4\r\n" - "and $10, $9, $8\r\n" - "beqz $10, 1f\r\n" - "nop \r\n" - "subu $9, $9, 1\r\n" - "dsll $11, $9, 2\r\n" - "move $13, %5\r\n" - "daddu $13, $13, $11\r\n" - "lwc1 %0, 0($13)\r\n" - - "1: \r\n" - "li $8, 2\r\n" - "and $10, $9, $8\r\n" - "beqz $10, 2f\r\n" - "nop \r\n" - "subu $9, $9, 2\r\n" - "dsll $11, $9, 2\r\n" - "move $13, %5\r\n" - "daddu $13, $13, $11\r\n" - "mov.s %1, %0\r\n" - "ldc1 %0, 0($13)\r\n" - - "2: \r\n" - "li $8, 4\r\n" - "and $10, $9, $8\r\n" - "beqz $10, 3f\r\n" - "nop \r\n" - "mov.s %2, %0\r\n" - "mov.s %3, %1\r\n" - "ldc1 %0, 0(%5)\r\n" - "ldc1 %1, 8(%5)\r\n" - - "3: \r\n" - "nop \r\n" + "li $8, 1\r\n" + "move $9, %4\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 1f\r\n" + "nop \r\n" + "subu $9, $9, 1\r\n" + PTR_SLL "$11, $9, 2\r\n" + "move $13, %5\r\n" + PTR_ADDU "$13, $13, $11\r\n" + "lwc1 %0, 0($13)\r\n" + + "1: \r\n" + "li $8, 2\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 2f\r\n" + "nop \r\n" + "subu $9, $9, 2\r\n" + PTR_SLL "$11, $9, 2\r\n" + "move $13, %5\r\n" + PTR_ADDU "$13, $13, $11\r\n" + "mov.s %1, %0\r\n" + "ldc1 %0, 0($13)\r\n" + + "2: \r\n" + "li $8, 4\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 3f\r\n" + "nop \r\n" + "mov.s %2, %0\r\n" + "mov.s %3, %1\r\n" + "ldc1 %0, 0(%5)\r\n" + "ldc1 %1, 8(%5)\r\n" + + "3: \r\n" + "nop \r\n" ".set reorder\r\n" : "=f" (mmA), "=f" (mmF), "=f" (mmD), "=f" (mmC) diff --git a/simd/loongson/jdcolext-mmi.c b/simd/loongson/jdcolext-mmi.c index 8a159ef..3b5b2f2 100644 --- a/simd/loongson/jdcolext-mmi.c +++ b/simd/loongson/jdcolext-mmi.c @@ -247,64 +247,64 @@ void jsimd_ycc_rgb_convert_mmi(JDIMENSION out_width, JSAMPIMAGE input_buf, col = num_cols * 3; asm(".set noreorder\r\n" - "li $8, 16\r\n" - "move $9, %4\r\n" - "mov.s $f4, %1\r\n" - "mov.s $f6, %3\r\n" - "move $10, %5\r\n" - "bltu $9, $8, 1f\r\n" - "nop \r\n" - "gssdlc1 $f4, 7($10)\r\n" - "gssdrc1 $f4, 0($10)\r\n" - "gssdlc1 $f6, 7+8($10)\r\n" - "gssdrc1 $f6, 8($10)\r\n" - "mov.s $f4, %2\r\n" - "subu $9, $9, 16\r\n" - "daddu $10, $10, 16\r\n" - "b 2f\r\n" - "nop \r\n" - - "1: \r\n" - "li $8, 8\r\n" /* st8 */ - "bltu $9, $8, 2f\r\n" - "nop \r\n" - "gssdlc1 $f4, 7($10)\r\n" - "gssdrc1 $f4, ($10)\r\n" - "mov.s $f4, %3\r\n" - "subu $9, $9, 8\r\n" - "daddu $10, $10, 8\r\n" - - "2: \r\n" - "li $8, 4\r\n" /* st4 */ - "mfc1 $11, $f4\r\n" - "bltu $9, $8, 3f\r\n" - "nop \r\n" - "swl $11, 3($10)\r\n" - "swr $11, 0($10)\r\n" - "li $8, 32\r\n" - "mtc1 $8, $f6\r\n" - "dsrl $f4, $f4, $f6\r\n" - "mfc1 $11, $f4\r\n" - "subu $9, $9, 4\r\n" - "daddu $10, $10, 4\r\n" - - "3: \r\n" - "li $8, 2\r\n" /* st2 */ - "bltu $9, $8, 4f\r\n" - "nop \r\n" - "ush $11, 0($10)\r\n" - "srl $11, 16\r\n" - "subu $9, $9, 2\r\n" - "daddu $10, $10, 2\r\n" - - "4: \r\n" - "li $8, 1\r\n" /* st1 */ - "bltu $9, $8, 5f\r\n" - "nop \r\n" - "sb $11, 0($10)\r\n" - - "5: \r\n" - "nop \r\n" /* end */ + "li $8, 16\r\n" + "move $9, %4\r\n" + "mov.s $f4, %1\r\n" + "mov.s $f6, %3\r\n" + "move $10, %5\r\n" + "bltu $9, $8, 1f\r\n" + "nop \r\n" + "gssdlc1 $f4, 7($10)\r\n" + "gssdrc1 $f4, 0($10)\r\n" + "gssdlc1 $f6, 7+8($10)\r\n" + "gssdrc1 $f6, 8($10)\r\n" + "mov.s $f4, %2\r\n" + "subu $9, $9, 16\r\n" + PTR_ADDU "$10, $10, 16\r\n" + "b 2f\r\n" + "nop \r\n" + + "1: \r\n" + "li $8, 8\r\n" /* st8 */ + "bltu $9, $8, 2f\r\n" + "nop \r\n" + "gssdlc1 $f4, 7($10)\r\n" + "gssdrc1 $f4, 0($10)\r\n" + "mov.s $f4, %3\r\n" + "subu $9, $9, 8\r\n" + PTR_ADDU "$10, $10, 8\r\n" + + "2: \r\n" + "li $8, 4\r\n" /* st4 */ + "mfc1 $11, $f4\r\n" + "bltu $9, $8, 3f\r\n" + "nop \r\n" + "swl $11, 3($10)\r\n" + "swr $11, 0($10)\r\n" + "li $8, 32\r\n" + "mtc1 $8, $f6\r\n" + "dsrl $f4, $f4, $f6\r\n" + "mfc1 $11, $f4\r\n" + "subu $9, $9, 4\r\n" + PTR_ADDU "$10, $10, 4\r\n" + + "3: \r\n" + "li $8, 2\r\n" /* st2 */ + "bltu $9, $8, 4f\r\n" + "nop \r\n" + "ush $11, 0($10)\r\n" + "srl $11, 16\r\n" + "subu $9, $9, 2\r\n" + PTR_ADDU "$10, $10, 2\r\n" + + "4: \r\n" + "li $8, 1\r\n" /* st1 */ + "bltu $9, $8, 5f\r\n" + "nop \r\n" + "sb $11, 0($10)\r\n" + + "5: \r\n" + "nop \r\n" /* end */ : "=m" (*outptr) : "f" (mmA), "f" (mmC), "f" (mmE), "r" (col), "r" (outptr) : "$f4", "$f6", "$8", "$9", "$10", "$11", "memory" @@ -357,41 +357,41 @@ void jsimd_ycc_rgb_convert_mmi(JDIMENSION out_width, JSAMPIMAGE input_buf, col = num_cols; asm(".set noreorder\r\n" /* st16 */ - "li $8, 4\r\n" - "move $9, %6\r\n" - "move $10, %7\r\n" - "mov.s $f4, %2\r\n" - "mov.s $f6, %4\r\n" - "bltu $9, $8, 1f\r\n" - "nop \r\n" - "gssdlc1 $f4, 7($10)\r\n" - "gssdrc1 $f4, ($10)\r\n" - "gssdlc1 $f6, 7+8($10)\r\n" - "gssdrc1 $f6, 8($10)\r\n" - "mov.s $f4, %3\r\n" - "mov.s $f6, %5\r\n" - "subu $9, $9, 4\r\n" - "daddu $10, $10, 16\r\n" - - "1: \r\n" - "li $8, 2\r\n" /* st8 */ - "bltu $9, $8, 2f\r\n" - "nop \r\n" - "gssdlc1 $f4, 7($10)\r\n" - "gssdrc1 $f4, 0($10)\r\n" - "mov.s $f4, $f6\r\n" - "subu $9, $9, 2\r\n" - "daddu $10, $10, 8\r\n" - - "2: \r\n" - "li $8, 1\r\n" /* st4 */ - "bltu $9, $8, 3f\r\n" - "nop \r\n" - "gsswlc1 $f4, 3($10)\r\n" - "gsswrc1 $f4, 0($10)\r\n" - - "3: \r\n" - "li %1, 0\r\n" /* end */ + "li $8, 4\r\n" + "move $9, %6\r\n" + "move $10, %7\r\n" + "mov.s $f4, %2\r\n" + "mov.s $f6, %4\r\n" + "bltu $9, $8, 1f\r\n" + "nop \r\n" + "gssdlc1 $f4, 7($10)\r\n" + "gssdrc1 $f4, 0($10)\r\n" + "gssdlc1 $f6, 7+8($10)\r\n" + "gssdrc1 $f6, 8($10)\r\n" + "mov.s $f4, %3\r\n" + "mov.s $f6, %5\r\n" + "subu $9, $9, 4\r\n" + PTR_ADDU "$10, $10, 16\r\n" + + "1: \r\n" + "li $8, 2\r\n" /* st8 */ + "bltu $9, $8, 2f\r\n" + "nop \r\n" + "gssdlc1 $f4, 7($10)\r\n" + "gssdrc1 $f4, 0($10)\r\n" + "mov.s $f4, $f6\r\n" + "subu $9, $9, 2\r\n" + PTR_ADDU "$10, $10, 8\r\n" + + "2: \r\n" + "li $8, 1\r\n" /* st4 */ + "bltu $9, $8, 3f\r\n" + "nop \r\n" + "gsswlc1 $f4, 3($10)\r\n" + "gsswrc1 $f4, 0($10)\r\n" + + "3: \r\n" + "li %1, 0\r\n" /* end */ : "=m" (*outptr), "=r" (col) : "f" (mmA), "f" (mmC), "f" (mmD), "f" (mmH), "r" (col), "r" (outptr) diff --git a/simd/loongson/jdmrgext-mmi.c b/simd/loongson/jdmrgext-mmi.c index 667269b..be09ff2 100644 --- a/simd/loongson/jdmrgext-mmi.c +++ b/simd/loongson/jdmrgext-mmi.c @@ -296,7 +296,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width, "mov.s $f6, %5\r\n" "mov.s $f8, %6\r\n" "subu $9, $9, 24\r\n" - "daddu $10, $10, 24\r\n" + PTR_ADDU "$10, $10, 24\r\n" "1: \r\n" "li $8, 16\r\n" /* st16 */ @@ -308,7 +308,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width, "gssdrc1 $f6, 8($10)\r\n" "mov.s $f4, $f8\r\n" "subu $9, $9, 16\r\n" - "daddu $10, $10, 16\r\n" + PTR_ADDU "$10, $10, 16\r\n" "2: \r\n" "li $8, 8\r\n" /* st8 */ @@ -318,7 +318,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width, "gssdrc1 $f4, 0($10)\r\n" "mov.s $f4, $f6\r\n" "subu $9, $9, 8\r\n" - "daddu $10, $10, 8\r\n" + PTR_ADDU "$10, $10, 8\r\n" "3: \r\n" "li $8, 4\r\n" /* st4 */ @@ -332,7 +332,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width, "dsrl $f4, $f4, $f6\r\n" "mfc1 $11, $f4\r\n" "subu $9, $9, 4\r\n" - "daddu $10, $10, 4\r\n" + PTR_ADDU "$10, $10, 4\r\n" "4: \r\n" "li $8, 2\r\n" /* st2 */ @@ -341,7 +341,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width, "ush $11, 0($10)\r\n" "srl $11, 16\r\n" "subu $9, $9, 2\r\n" - "daddu $10, $10, 2\r\n" + PTR_ADDU "$10, $10, 2\r\n" "5: \r\n" "li $8, 1\r\n" /* st1 */ @@ -440,7 +440,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width, "bltu $9, $8, 1f\r\n" "nop \r\n" "gssdlc1 $f4, 7($10)\r\n" - "gssdrc1 $f4, ($10)\r\n" + "gssdrc1 $f4, 0($10)\r\n" "gssdlc1 $f6, 7+8($10)\r\n" "gssdrc1 $f6, 8($10)\r\n" "gssdlc1 $f8, 7+16($10)\r\n" @@ -452,7 +452,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width, "mov.s $f8, %8\r\n" "mov.s $f10, %9\r\n" "subu $9, $9, 8\r\n" - "daddu $10, $10, 32\r\n" + PTR_ADDU "$10, $10, 32\r\n" "1: \r\n" "li $8, 4\r\n" /* st16 */ @@ -465,7 +465,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width, "mov.s $f4, $f8\r\n" "mov.s $f6, $f10\r\n" "subu $9, $9, 4\r\n" - "daddu $10, $10, 16\r\n" + PTR_ADDU "$10, $10, 16\r\n" "2: \r\n" "li $8, 2\r\n" /* st8 */ @@ -475,7 +475,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width, "gssdrc1 $f4, 0($10)\r\n" "mov.s $f4, $f6\r\n" "subu $9, $9, 2\r\n" - "daddu $10, $10, 8\r\n" + PTR_ADDU "$10, $10, 8\r\n" "3: \r\n" "li $8, 1\r\n" /* st4 */ diff --git a/simd/loongson/jdsample-mmi.c b/simd/loongson/jdsample-mmi.c index 8255b60..8ae94e7 100644 --- a/simd/loongson/jdsample-mmi.c +++ b/simd/loongson/jdsample-mmi.c @@ -135,18 +135,18 @@ void jsimd_h2v2_fancy_upsample_mmi(int max_v_samp_factor, if (downsampled_width & 7) { tmp = (downsampled_width - 1) * sizeof(JSAMPLE); tmp1 = downsampled_width * sizeof(JSAMPLE); - asm("daddu $8, %3, %6\r\n" - "lb $9, ($8)\r\n" - "daddu $8, %3, %7\r\n" - "sb $9, ($8)\r\n" - "daddu $8, %4, %6\r\n" - "lb $9, ($8)\r\n" - "daddu $8, %4, %7\r\n" - "sb $9, ($8)\r\n" - "daddu $8, %5, %6\r\n" - "lb $9, ($8)\r\n" - "daddu $8, %5, %7\r\n" - "sb $9, ($8)\r\n" + asm(PTR_ADDU "$8, %3, %6\r\n" + "lb $9, ($8)\r\n" + PTR_ADDU "$8, %3, %7\r\n" + "sb $9, ($8)\r\n" + PTR_ADDU "$8, %4, %6\r\n" + "lb $9, ($8)\r\n" + PTR_ADDU "$8, %4, %7\r\n" + "sb $9, ($8)\r\n" + PTR_ADDU "$8, %5, %6\r\n" + "lb $9, ($8)\r\n" + PTR_ADDU "$8, %5, %7\r\n" + "sb $9, ($8)\r\n" : "=m" (*inptr_1), "=m" (*inptr0), "=m" (*inptr1) : "r" (inptr_1), "r" (inptr0), "r" (inptr1), "r" (tmp), "r" (tmp1) : "$8", "$9" @@ -262,10 +262,10 @@ void jsimd_h2v1_fancy_upsample_mmi(int max_v_samp_factor, if (downsampled_width & 7) { tmp = (downsampled_width - 1) * sizeof(JSAMPLE); tmp1 = downsampled_width * sizeof(JSAMPLE); - asm("daddu $8, %1, %2\r\n" - "lb $9, ($8)\r\n" - "daddu $8, %1, %3\r\n" - "sb $9, ($8)\r\n" + asm(PTR_ADDU "$8, %1, %2\r\n" + "lb $9, ($8)\r\n" + PTR_ADDU "$8, %1, %3\r\n" + "sb $9, ($8)\r\n" : "=m" (*inptr0) : "r" (inptr0), "r" (tmp), "r" (tmp1) : "$8", "$9" diff --git a/simd/loongson/jsimd_mmi.h b/simd/loongson/jsimd_mmi.h index c945905..a5ffc35 100644 --- a/simd/loongson/jsimd_mmi.h +++ b/simd/loongson/jsimd_mmi.h @@ -33,6 +33,13 @@ /* Common code */ +#if defined(_ABI64) && _MIPS_SIM == _ABI64 +# define PTR_ADDU "daddu " +# define PTR_SLL "dsll " +#else +# define PTR_ADDU "addu " +# define PTR_SLL "sll " +#endif #define SIZEOF_MMWORD 8 #define BYTE_BIT 8