From 0985473636b8bb998eb887c28489cfe5e57905a5 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Wed, 22 Sep 2010 08:43:09 +0000 Subject: [PATCH] sha1-mips.pl, mips-mont.pl: unify MIPS assembler modules in respect to ABI and binutils. --- crypto/bn/asm/mips-mont.pl | 419 ++++++++++++++++++++++++++++++++++++ crypto/sha/asm/sha1-mips.pl | 151 +++++++++---- 2 files changed, 529 insertions(+), 41 deletions(-) create mode 100644 crypto/bn/asm/mips-mont.pl diff --git a/crypto/bn/asm/mips-mont.pl b/crypto/bn/asm/mips-mont.pl new file mode 100644 index 0000000000..e2395f4b5d --- /dev/null +++ b/crypto/bn/asm/mips-mont.pl @@ -0,0 +1,419 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# This module doesn't present direct interest for OpenSSL, because it +# doesn't provide better performance for longer keys. While 512-bit +# RSA private key operations are 40% faster, 1024-bit ones are hardly +# faster at all, while longer key operations are slower by up to 20%. +# It might be of interest to embedded system developers though, as +# it's smaller than 1KB, yet offers ~3x improvement over compiler +# generated code. + +###################################################################### +# There is a number of MIPS ABI in use, O32 and N32/64 are most +# widely used. Then there is a new contender: NUBI. It appears that if +# one picks the latter, it's possible to arrange code in ABI neutral +# manner. Therefore let's stick to NUBI register layout: +# +($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); +($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); +($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); +# +# The return value is placed in $a0. Following coding rules facilitate +# interoperability: +# +# - never ever touch $tp, "thread pointer", former $gp; +# - copy return value to $t0, former $v0 [or to $a0 if you're adapting +# old code]; +# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; +# +# For reference here is register layout for N32/64 MIPS ABIs: +# +# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); +# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); +# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); +# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); +# +$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 + +if ($flavour =~ /64|n32/i) { + $PTR_ADD="dadd"; # incidentally works even on n32 + $PTR_SUB="dsub"; # incidentally works even on n32 + $REG_S="sd"; + $REG_L="ld"; + $SZREG=8; +} else { + $PTR_ADD="add"; + $PTR_SUB="sub"; + $REG_S="sw"; + $REG_L="lw"; + $SZREG=4; +} +$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff000 : 0x00ff0000; +# +# +# +###################################################################### + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +if ($flavour =~ /64|n32/i) { + $LD="ld"; + $ST="sd"; + $MULTU="dmultu"; + $ADDU="daddu"; + $SUBU="dsubu"; + $BNSZ=8; +} else { + $LD="lw"; + $ST="sw"; + $MULTU="multu"; + $ADDU="addu"; + $SUBU="subu"; + $BNSZ=4; +} + +# int bn_mul_mont( +$rp=$a0; # BN_ULONG *rp, +$ap=$a1; # const BN_ULONG *ap, +$bp=$a2; # const BN_ULONG *bp, +$np=$a3; # const BN_ULONG *np, +$n0=$a4; # const BN_ULONG *n0, +$num=$a5; # int num); + +$lo0=$a6; +$hi0=$a7; +$lo1=$t1; +$hi1=$t2; +$aj=$s0; +$bi=$s1; +$nj=$s2; +$tp=$s3; +$alo=$s4; +$ahi=$s5; +$nlo=$s6; +$nhi=$s7; +$tj=$s8; +$i=$s9; +$j=$s10; +$m1=$s11; + +$FRAMESIZE=14; + +$code=<<___; +.text + +.set noat +.set noreorder + +.align 5 +.globl bn_mul_mont +.ent bn_mul_mont +bn_mul_mont: +___ +$code.=<<___ if ($flavour =~ /o32/i); + lw $n0,16($sp) + lw $num,20($sp) +___ +$code.=<<___; + slt $at,$num,4 + beqzl $at,bn_mul_mont_internal + li $t0,0 + jr $ra + li $a0,0 +.end bn_mul_mont + +.align 5 +.ent bn_mul_mont_internal +bn_mul_mont_internal: + .frame $fp,$FRAMESIZE*$SZREG,$ra + .mask 0x40000000|$SAVED_REGS_MASK,-$SZREG + $PTR_SUB $sp,$FRAMESIZE*$SZREG + $REG_S $fp,($FRAMESIZE-1)*$SZREG($sp) + $REG_S $s11,($FRAMESIZE-2)*$SZREG($sp) + $REG_S $s10,($FRAMESIZE-3)*$SZREG($sp) + $REG_S $s9,($FRAMESIZE-4)*$SZREG($sp) + $REG_S $s8,($FRAMESIZE-5)*$SZREG($sp) + $REG_S $s7,($FRAMESIZE-6)*$SZREG($sp) + $REG_S $s6,($FRAMESIZE-7)*$SZREG($sp) + $REG_S $s5,($FRAMESIZE-8)*$SZREG($sp) + $REG_S $s4,($FRAMESIZE-9)*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_S $s3,($FRAMESIZE-10)*$SZREG($sp) + $REG_S $s2,($FRAMESIZE-11)*$SZREG($sp) + $REG_S $s1,($FRAMESIZE-12)*$SZREG($sp) + $REG_S $s0,($FRAMESIZE-13)*$SZREG($sp) +___ +$code.=<<___; + move $fp,$sp + + .set reorder + $LD $n0,0($n0) + $LD $bi,0($bp) # bp[0] + $LD $aj,0($ap) # ap[0] + $LD $nj,0($np) # np[0] + + $PTR_SUB $sp,2*$BNSZ # place for two extra words + sll $num,`log($BNSZ)/log(2)` + li $at,-4096 + $PTR_SUB $sp,$num + and $sp,$at + + $MULTU $aj,$bi + $LD $alo,$BNSZ($ap) + $LD $nlo,$BNSZ($np) + mflo $lo0 + mfhi $hi0 + $MULTU $lo0,$n0 + mflo $m1 + + $MULTU $alo,$bi + mflo $alo + mfhi $ahi + + $MULTU $nj,$m1 + mflo $lo1 + mfhi $hi1 + $MULTU $nlo,$m1 + $ADDU $lo1,$lo0 + sltu $at,$lo1,$lo0 + $ADDU $hi1,$at + mflo $nlo + mfhi $nhi + + move $tp,$sp + li $j,2*$BNSZ +.align 4 +.L1st: + .set noreorder + $PTR_ADD $aj,$ap,$j + $PTR_ADD $nj,$np,$j + $LD $aj,($aj) + $LD $nj,($nj) + + $MULTU $aj,$bi + $ADDU $lo0,$alo,$hi0 + $ADDU $lo1,$nlo,$hi1 + sltu $at,$lo0,$hi0 + sltu $t0,$lo1,$hi1 + $ADDU $hi0,$ahi,$at + $ADDU $hi1,$nhi,$t0 + mflo $alo + mfhi $ahi + + $ADDU $lo1,$lo0 + sltu $at,$lo1,$lo0 + $MULTU $nj,$m1 + $ADDU $hi1,$at + addu $j,$BNSZ + $ST $lo1,($tp) + sltu $t0,$j,$num + mflo $nlo + mfhi $nhi + + bnez $t0,.L1st + $PTR_ADD $tp,$BNSZ + .set reorder + + $ADDU $lo0,$alo,$hi0 + sltu $at,$lo0,$hi0 + $ADDU $hi0,$ahi,$at + + $ADDU $lo1,$nlo,$hi1 + sltu $t0,$lo1,$hi1 + $ADDU $hi1,$nhi,$t0 + $ADDU $lo1,$lo0 + sltu $at,$lo1,$lo0 + $ADDU $hi1,$at + + $ST $lo1,($tp) + + $ADDU $hi1,$hi0 + sltu $at,$hi1,$hi0 + $ST $hi1,$BNSZ($tp) + $ST $at,2*$BNSZ($tp) + + li $i,$BNSZ +.align 4 +.Louter: + $PTR_ADD $bi,$bp,$i + $LD $bi,($bi) + $LD $aj,($ap) + $LD $alo,$BNSZ($ap) + $LD $tj,($sp) + + $MULTU $aj,$bi + $LD $nj,($np) + $LD $nlo,$BNSZ($np) + mflo $lo0 + mfhi $hi0 + $ADDU $lo0,$tj + $MULTU $lo0,$n0 + sltu $at,$lo0,$tj + $ADDU $hi0,$at + mflo $m1 + + $MULTU $alo,$bi + mflo $alo + mfhi $ahi + + $MULTU $nj,$m1 + mflo $lo1 + mfhi $hi1 + + $MULTU $nlo,$m1 + $ADDU $lo1,$lo0 + sltu $at,$lo1,$lo0 + $ADDU $hi1,$at + mflo $nlo + mfhi $nhi + + move $tp,$sp + li $j,2*$BNSZ + $LD $tj,$BNSZ($tp) +.align 4 +.Linner: + .set noreorder + $PTR_ADD $aj,$ap,$j + $PTR_ADD $nj,$np,$j + $LD $aj,($aj) + $LD $nj,($nj) + + $MULTU $aj,$bi + $ADDU $lo0,$alo,$hi0 + $ADDU $lo1,$nlo,$hi1 + sltu $at,$lo0,$hi0 + sltu $t0,$lo1,$hi1 + $ADDU $hi0,$ahi,$at + $ADDU $hi1,$nhi,$t0 + mflo $alo + mfhi $ahi + + $ADDU $lo0,$tj + addu $j,$BNSZ + $MULTU $nj,$m1 + sltu $at,$lo0,$tj + $ADDU $lo1,$lo0 + $ADDU $hi0,$at + sltu $t0,$lo1,$lo0 + $LD $tj,2*$BNSZ($tp) + $ADDU $hi1,$t0 + sltu $at,$j,$num + mflo $nlo + mfhi $nhi + $ST $lo1,($tp) + bnez $at,.Linner + $PTR_ADD $tp,$BNSZ + .set reorder + + $ADDU $lo0,$alo,$hi0 + sltu $at,$lo0,$hi0 + $ADDU $hi0,$ahi,$at + $ADDU $lo0,$tj + sltu $t0,$lo0,$tj + $ADDU $hi0,$t0 + + $LD $tj,2*$BNSZ($tp) + $ADDU $lo1,$nlo,$hi1 + sltu $at,$lo1,$hi1 + $ADDU $hi1,$nhi,$at + $ADDU $lo1,$lo0 + sltu $t0,$lo1,$lo0 + $ADDU $hi1,$t0 + $ST $lo1,($tp) + + $ADDU $lo1,$hi1,$hi0 + sltu $hi1,$lo1,$hi0 + $ADDU $lo1,$tj + sltu $at,$lo1,$tj + $ADDU $hi1,$at + $ST $lo1,$BNSZ($tp) + $ST $hi1,2*$BNSZ($tp) + + addu $i,$BNSZ + sltu $t0,$i,$num + bnez $t0,.Louter + + .set noreorder + $PTR_ADD $tj,$sp,$num # &tp[num] + move $tp,$sp + move $ap,$sp + li $hi0,0 # clear borrow bit + +.align 4 +.Lsub: $LD $lo0,($tp) + $LD $lo1,($np) + $PTR_ADD $tp,$BNSZ + $PTR_ADD $np,$BNSZ + $SUBU $lo1,$lo0,$lo1 # tp[i]-np[i] + sgtu $at,$lo1,$lo0 + $SUBU $lo0,$lo1,$hi0 + sgtu $hi0,$lo0,$lo1 + $ST $lo0,($rp) + or $hi0,$at + sltu $at,$tp,$tj + bnez $at,.Lsub + $PTR_ADD $rp,$BNSZ + + $SUBU $hi0,$hi1,$hi0 # handle upmost overflow bit + move $tp,$sp + $PTR_SUB $rp,$num # restore rp + not $hi1,$hi0 + + and $ap,$hi0,$sp + and $bp,$hi1,$rp + or $ap,$ap,$bp # ap=borrow?tp:rp + +.align 4 +.Lcopy: $LD $aj,($ap) + $PTR_ADD $ap,$BNSZ + $ST $zero,($tp) + $PTR_ADD $tp,$BNSZ + sltu $at,$tp,$tj + $ST $aj,($rp) + bnez $at,.Lcopy + $PTR_ADD $rp,$BNSZ + + li $a0,1 + li $t0,1 + + .set noreorder + move $sp,$fp + $REG_L $fp,($FRAMESIZE-1)*$SZREG($sp) + $REG_L $s11,($FRAMESIZE-2)*$SZREG($sp) + $REG_L $s10,($FRAMESIZE-3)*$SZREG($sp) + $REG_L $s9,($FRAMESIZE-4)*$SZREG($sp) + $REG_L $s8,($FRAMESIZE-5)*$SZREG($sp) + $REG_L $s7,($FRAMESIZE-6)*$SZREG($sp) + $REG_L $s6,($FRAMESIZE-7)*$SZREG($sp) + $REG_L $s5,($FRAMESIZE-8)*$SZREG($sp) + $REG_L $s4,($FRAMESIZE-9)*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $s3,($FRAMESIZE-10)*$SZREG($sp) + $REG_L $s2,($FRAMESIZE-11)*$SZREG($sp) + $REG_L $s1,($FRAMESIZE-12)*$SZREG($sp) + $REG_L $s0,($FRAMESIZE-13)*$SZREG($sp) +___ +$code.=<<___; + jr $ra + $PTR_ADD $sp,$FRAMESIZE*$SZREG +.end bn_mul_mont_internal +.rdata +.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by " +___ + +$code =~ s/\`([^\`]*)\`/eval $1/gem; + +print $code; +close STDOUT; diff --git a/crypto/sha/asm/sha1-mips.pl b/crypto/sha/asm/sha1-mips.pl index a2f7aaddc1..75a6fb49c6 100644 --- a/crypto/sha/asm/sha1-mips.pl +++ b/crypto/sha/asm/sha1-mips.pl @@ -14,12 +14,62 @@ # vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32- # compatible subroutine. There is room for minor optimization on # little-endian platforms... + +###################################################################### +# There is a number of MIPS ABI in use, O32 and N32/64 are most +# widely used. Then there is a new contender: NUBI. It appears that if +# one picks the latter, it's possible to arrange code in ABI neutral +# manner. Therefore let's stick to NUBI register layout: +# +($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); +($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); +($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); +# +# The return value is placed in $a0. Following coding rules facilitate +# interoperability: +# +# - never ever touch $tp, "thread pointer", former $gp; +# - copy return value to $t0, former $v0 [or to $a0 if you're adapting +# old code]; +# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; +# +# For reference here is register layout for N32/64 MIPS ABIs: +# +# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); +# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); +# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); +# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); +# +$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 + +if ($flavour =~ /64|n32/i) { + $PTR_ADD="dadd"; # incidentally works even on n32 + $PTR_SUB="dsub"; # incidentally works even on n32 + $REG_S="sd"; + $REG_L="ld"; + $PTR_SLL="dsll"; # incidentally works even on n32 + $SZREG=8; +} else { + $PTR_ADD="add"; + $PTR_SUB="sub"; + $REG_S="sw"; + $REG_L="lw"; + $PTR_SLL="sll"; + $SZREG=4; +} +$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff008 : 0x00ff0000; +# +# # -# The code is somewhat IRIX-centric, i.e. is likely to require minor -# adaptations for other OSes... +###################################################################### + +for (@ARGV) { $big_endian=1 if (/\-DB_ENDIAN/); + $big_endian=0 if (/\-DL_ENDIAN/); + $output=$_ if (/^\w[\w\-]*\.\w+$/); } +open STDOUT,">$output"; -for (@ARGV) { $big_endian=1 if (/\-DB_ENDIAN/); - $big_endian=0 if (/\-DL_ENDIAN/); } if (!defined($big_endian)) { $big_endian=(unpack('L',pack('N',1))==1); } @@ -37,9 +87,9 @@ $B="\$2"; $C="\$3"; $D="\$7"; $E="\$24"; @V=($A,$B,$C,$D,$E); -$t0="\$25"; # jp,t9 -$t1="\$28"; # gp -$t2="\$30"; # fp,s8 +$t0="\$25"; +$t1=$num; # $num is offloaded to stack +$t2="\$30"; # fp $K="\$31"; # ra $FRAMESIZE=16; @@ -187,9 +237,6 @@ ___ } $code=<<___; -#include -#include - .text .set noat @@ -198,22 +245,32 @@ $code=<<___; .globl sha1_block_data_order .ent sha1_block_data_order sha1_block_data_order: - .frame sp,$FRAMESIZE*SZREG,zero - .mask 0xd0ff0000,-$FRAMESIZE*SZREG + .frame $sp,$FRAMESIZE*$SZREG,$ra + .mask 0xd0000000|$SAVED_REGS_MASK,-$SZREG .set noreorder - PTR_SUB sp,$FRAMESIZE*SZREG - REG_S \$31,($FRAMESIZE-1)*SZREG(sp) - REG_S \$30,($FRAMESIZE-2)*SZREG(sp) - REG_S \$28,($FRAMESIZE-3)*SZREG(sp) - REG_S \$23,($FRAMESIZE-4)*SZREG(sp) - REG_S \$22,($FRAMESIZE-5)*SZREG(sp) - REG_S \$21,($FRAMESIZE-6)*SZREG(sp) - REG_S \$20,($FRAMESIZE-7)*SZREG(sp) - REG_S \$19,($FRAMESIZE-8)*SZREG(sp) - REG_S \$18,($FRAMESIZE-9)*SZREG(sp) - REG_S \$17,($FRAMESIZE-10)*SZREG(sp) - REG_S \$16,($FRAMESIZE-11)*SZREG(sp) - + $PTR_SUB $sp,$FRAMESIZE*$SZREG + $REG_S $ra,($FRAMESIZE-1)*$SZREG($sp) + $REG_S $fp,($FRAMESIZE-2)*$SZREG($sp) + $REG_S $s11,($FRAMESIZE-3)*$SZREG($sp) + $REG_S $s10,($FRAMESIZE-4)*$SZREG($sp) + $REG_S $s9,($FRAMESIZE-5)*$SZREG($sp) + $REG_S $s8,($FRAMESIZE-6)*$SZREG($sp) + $REG_S $s7,($FRAMESIZE-7)*$SZREG($sp) + $REG_S $s6,($FRAMESIZE-8)*$SZREG($sp) + $REG_S $s5,($FRAMESIZE-9)*$SZREG($sp) + $REG_S $s4,($FRAMESIZE-10)*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_S $s3,($FRAMESIZE-11)*$SZREG($sp) + $REG_S $s2,($FRAMESIZE-12)*$SZREG($sp) + $REG_S $s1,($FRAMESIZE-13)*$SZREG($sp) + $REG_S $s0,($FRAMESIZE-14)*$SZREG($sp) + $REG_S $gp,($FRAMESIZE-15)*$SZREG($sp) +___ +$code.=<<___; + $PTR_SLL $num,6 + $PTR_ADD $num,$inp + $REG_S $num,0($sp) lw $A,0($ctx) lw $B,4($ctx) lw $C,8($ctx) @@ -246,6 +303,9 @@ $code.=<<___; ___ for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } $code.=<<___; + $PTR_ADD $inp,64 + $REG_L $num,0($sp) + addu $A,$X[0] addu $B,$X[1] sw $A,0($ctx) @@ -253,29 +313,38 @@ $code.=<<___; addu $D,$X[3] sw $B,4($ctx) addu $E,$X[4] - PTR_SUB $num,1 sw $C,8($ctx) sw $D,12($ctx) sw $E,16($ctx) .set noreorder - bnez $num,.Loop - PTR_ADD $inp,64 + bne $inp,$num,.Loop + nop .set noreorder - REG_L \$31,($FRAMESIZE-1)*SZREG(sp) - REG_L \$30,($FRAMESIZE-2)*SZREG(sp) - REG_L \$28,($FRAMESIZE-3)*SZREG(sp) - REG_L \$23,($FRAMESIZE-4)*SZREG(sp) - REG_L \$22,($FRAMESIZE-5)*SZREG(sp) - REG_L \$21,($FRAMESIZE-6)*SZREG(sp) - REG_L \$20,($FRAMESIZE-7)*SZREG(sp) - REG_L \$19,($FRAMESIZE-8)*SZREG(sp) - REG_L \$18,($FRAMESIZE-9)*SZREG(sp) - REG_L \$17,($FRAMESIZE-10)*SZREG(sp) - REG_L \$16,($FRAMESIZE-11)*SZREG(sp) - jr ra - PTR_ADD sp,$FRAMESIZE*SZREG + $REG_L $ra,($FRAMESIZE-1)*$SZREG($sp) + $REG_L $fp,($FRAMESIZE-2)*$SZREG($sp) + $REG_L $s11,($FRAMESIZE-3)*$SZREG($sp) + $REG_L $s10,($FRAMESIZE-4)*$SZREG($sp) + $REG_L $s9,($FRAMESIZE-5)*$SZREG($sp) + $REG_L $s8,($FRAMESIZE-6)*$SZREG($sp) + $REG_L $s7,($FRAMESIZE-7)*$SZREG($sp) + $REG_L $s6,($FRAMESIZE-8)*$SZREG($sp) + $REG_L $s5,($FRAMESIZE-9)*$SZREG($sp) + $REG_L $s4,($FRAMESIZE-10)*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $s3,($FRAMESIZE-11)*$SZREG($sp) + $REG_L $s2,($FRAMESIZE-12)*$SZREG($sp) + $REG_L $s1,($FRAMESIZE-13)*$SZREG($sp) + $REG_L $s0,($FRAMESIZE-14)*$SZREG($sp) + $REG_L $gp,($FRAMESIZE-15)*$SZREG($sp) +___ +$code.=<<___; + jr $ra + $PTR_ADD $sp,$FRAMESIZE*$SZREG .end sha1_block_data_order +.rdata +.asciiz "SHA1 for MIPS, CRYPTOGAMS by " ___ print $code; close STDOUT; -- 2.40.0