SPARCv9 assembly pack: harmonize ABI handling (so that it's handled in one

author Andy Polyakov <appro@openssl.org>

Thu, 25 Oct 2012 12:07:32 +0000 (12:07 +0000)

committer Andy Polyakov <appro@openssl.org>

Thu, 25 Oct 2012 12:07:32 +0000 (12:07 +0000)
author Andy Polyakov <appro@openssl.org>
Thu, 25 Oct 2012 12:07:32 +0000 (12:07 +0000)
committer Andy Polyakov <appro@openssl.org>
Thu, 25 Oct 2012 12:07:32 +0000 (12:07 +0000)
diff --git a/crypto/bn/asm/sparcv9-gf2m.pl b/crypto/bn/asm/sparcv9-gf2m.pl

index 04b9edde88d1ef3aac50fc0b7bb2c2d51b2c2d9e..ab94cd917c41a01642198e75b05cdf3b39f88d8f 100644 (file)
--- a/crypto/bn/asm/sparcv9-gf2m.pl
+++ b/crypto/bn/asm/sparcv9-gf2m.pl
@@ -18,23 +18,8 @@
  # ~100-230% faster than gcc-generated code and ~35-90% faster than
  # the pure SPARCv9 code path.
  
-$bits=32;
-for (@ARGV)     { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64)  { $bias=2047; $frame=192; }
-else            { $bias=0;    $frame=112; }
-
  $locals=16*8;
  
-$code.=<<___;
-#include <sparc_arch.h>
-
-.section        ".text",#alloc,#execinstr
-___
-$code.=<<___ if ($bits==64);
-.register       %g2,#scratch
-.register       %g3,#scratch
-___
-
  $tab="%l0";
  
  @T=("%g2","%g3");
@@ -44,6 +29,13 @@ $tab="%l0";
  ($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo;
  
  $code.=<<___;
+#include <sparc_arch.h>
+
+#ifdef __arch64__
+.register      %g2,#scratch
+.register      %g3,#scratch
+#endif
+
  #ifdef __PIC__
  SPARC_PIC_THUNK(%g1)
  #endif
@@ -74,7 +66,7 @@ bn_GF2m_mul_2x2:
  
  .align 16
  .Lsoftware:
-       save    %sp,-$frame-$locals,%sp
+       save    %sp,-STACK_FRAME-$locals,%sp
  
         sllx    %i1,32,$a
         mov     -1,$a12
@@ -83,7 +75,7 @@ bn_GF2m_mul_2x2:
         srlx    $a12,1,$a48                     ! 0x7fff...
         or      %i4,$b,$b
         srlx    $a12,2,$a12                     ! 0x3fff...
-       add     %sp,$bias+$frame,$tab
+       add     %sp,STACK_BIAS+STACK_FRAME,$tab
  
         sllx    $a,2,$a4
         mov     $a,$a1
diff --git a/crypto/md5/asm/md5-sparcv9.pl b/crypto/md5/asm/md5-sparcv9.pl

index ef16666cc34276f9b2030cbdef0a8b72d67c0dc9..407da3c1b0df2417f405166510919d7948820557 100644 (file)
--- a/crypto/md5/asm/md5-sparcv9.pl
+++ b/crypto/md5/asm/md5-sparcv9.pl
@@ -17,11 +17,6 @@
  # single-process result on 8-core processor, or ~11GBps per 2.85GHz
  # socket.
  
-$bits=32;
-for (@ARGV)    { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64) { $bias=2047; $frame=192; }
-else           { $bias=0;    $frame=112; }
-
  $output=shift;
  open STDOUT,">$output";
  
@@ -198,13 +193,14 @@ $code.=<<___;
  ___
  }
  
-$code.=<<___ if ($bits==64);
-.register      %g2,#scratch
-.register      %g3,#scratch
-___
  $code.=<<___;
  #include "sparc_arch.h"
  
+#ifdef __arch64__
+.register      %g2,#scratch
+.register      %g3,#scratch
+#endif
+
  .section       ".text",#alloc,#execinstr
  
  #ifdef __PIC__
@@ -246,7 +242,7 @@ md5_block_asm_data_order:
  
         .word   0x81b02800              ! MD5
  
-       bne,pt  `$bits==64?"%xcc":"%icc"`, .Lhw_loop
+       bne,pt  SIZE_T_CC, .Lhw_loop
         nop
  
  .Lhwfinish:
@@ -287,7 +283,7 @@ md5_block_asm_data_order:
  
         .word   0x81b02800              ! MD5
  
-       bne,pt  `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
+       bne,pt  SIZE_T_CC, .Lhwunaligned_loop
         for     %f26, %f26, %f10        ! %f10=%f26
  
         ba      .Lhwfinish
@@ -295,7 +291,7 @@ md5_block_asm_data_order:
  
  .align 16
  .Lsoftware:
-       save    %sp,-$frame,%sp
+       save    %sp,-STACK_FRAME,%sp
  
         rd      %asi,$saved_asi
         wr      %g0,0x88,%asi           ! ASI_PRIMARY_LITTLE
@@ -355,7 +351,7 @@ $code.=<<___;
         add     $t2,$C,$C
         add     $CD,$D,$D
         srl     $B,0,$B                 ! clruw $B
-       bne     `$bits==64?"%xcc":"%icc"`,.Loop
+       bne     SIZE_T_CC,.Loop
         srl     $D,0,$D                 ! clruw $D
  
         st      $A,[$ctx+0]             ! write out ctx
diff --git a/crypto/sha/asm/sha1-sparcv9.pl b/crypto/sha/asm/sha1-sparcv9.pl

index 47a82d3267247c57bfcff475a4d1bcdfca1d564c..b5efcde5c13962f37c0a7aad65269969759154c0 100644 (file)
--- a/crypto/sha/asm/sha1-sparcv9.pl
+++ b/crypto/sha/asm/sha1-sparcv9.pl
@@ -25,11 +25,6 @@
  # single-process result on 8-core processor, or ~9GBps per 2.85GHz
  # socket.
  
-$bits=32;
-for (@ARGV)    { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64) { $bias=2047; $frame=192; }
-else           { $bias=0;    $frame=112; }
-
  $output=shift;
  open STDOUT,">$output";
  
@@ -185,13 +180,14 @@ $code.=<<___;
  ___
  }
  
-$code.=<<___ if ($bits==64);
-.register      %g2,#scratch
-.register      %g3,#scratch
-___
  $code.=<<___;
  #include "sparc_arch.h"
  
+#ifdef __arch64__
+.register      %g2,#scratch
+.register      %g3,#scratch
+#endif
+
  .section       ".text",#alloc,#execinstr
  
  #ifdef __PIC__
@@ -231,7 +227,7 @@ sha1_block_data_order:
  
         .word   0x81b02820              ! SHA1
  
-       bne,pt  `$bits==64?"%xcc":"%icc"`, .Lhw_loop
+       bne,pt  SIZE_T_CC, .Lhw_loop
         nop
  
  .Lhwfinish:
@@ -271,7 +267,7 @@ sha1_block_data_order:
  
         .word   0x81b02820              ! SHA1
  
-       bne,pt  `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
+       bne,pt  SIZE_T_CC, .Lhwunaligned_loop
         for     %f26, %f26, %f10        ! %f10=%f26
  
         ba      .Lhwfinish
@@ -279,7 +275,7 @@ sha1_block_data_order:
  
  .align 16
  .Lsoftware:
-       save    %sp,-$frame,%sp
+       save    %sp,-STACK_FRAME,%sp
         sllx    $len,6,$len
         add     $inp,$len,$len
  
@@ -359,7 +355,7 @@ $code.=<<___;
         add     $E,@X[4],$E
         st      $E,[$ctx+16]
  
-       bne     `$bits==64?"%xcc":"%icc"`,.Lloop
+       bne     SIZE_T_CC,.Lloop
         andn    $inp,7,$tmp0
  
         ret
diff --git a/crypto/sha/asm/sha512-sparcv9.pl b/crypto/sha/asm/sha512-sparcv9.pl

index 4c749a5c8f387891ce8c27889875f53500d58a14..5a9c15d1d34abecc205ad6a1ab49a07d2da6a5df 100644 (file)
--- a/crypto/sha/asm/sha512-sparcv9.pl
+++ b/crypto/sha/asm/sha512-sparcv9.pl
@@ -49,12 +49,6 @@
  # saturates at 11.5x single-process result on 8-core processor, or
  # ~11/16GBps per 2.85GHz socket.
  
-
-$bits=32;
-for (@ARGV)    { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64) { $bias=2047; $frame=192; }
-else           { $bias=0;    $frame=112; }
-
  $output=shift;
  open STDOUT,">$output";
  
@@ -191,29 +185,29 @@ $code.=<<___ if ($i<15);
         or      @pair[1],$tmp2,$tmp2
         `"ld    [$inp+".eval(32+4+$i*8)."],@pair[1]"    if ($i<12)`
         add     $h,$tmp2,$T1
-       $ST     $tmp2,[%sp+`$bias+$frame+$i*$SZ`]
+       $ST     $tmp2,[%sp+STACK_BIAS+STACK_FRAME+`$i*$SZ`]
  ___
  $code.=<<___ if ($i==12);
         bnz,a,pn        %icc,.+8
         ld      [$inp+128],%l0
  ___
  $code.=<<___ if ($i==15);
-       ld      [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+0`],%l2
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+0`],%l2
         sllx    @pair[1],$tmp31,$tmp2   ! Xload($i)
         add     $tmp31,32,$tmp0
-       ld      [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+4`],%l3
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+4`],%l3
         sllx    @pair[0],$tmp0,$tmp1
-       ld      [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+0`],%l4
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+0`],%l4
         srlx    @pair[2],$tmp32,@pair[1]
         or      $tmp1,$tmp2,$tmp2
-       ld      [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+4`],%l5
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+4`],%l5
         or      @pair[1],$tmp2,$tmp2
-       ld      [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+0`],%l6
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+0`],%l6
         add     $h,$tmp2,$T1
-       $ST     $tmp2,[%sp+`$bias+$frame+$i*$SZ`]
-       ld      [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+4`],%l7
-       ld      [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+0`],%l0
-       ld      [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+4`],%l1
+       $ST     $tmp2,[%sp+STACK_BIAS+STACK_FRAME+`$i*$SZ`]
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+4`],%l7
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+0`],%l0
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+4`],%l1
  ___
  } if ($SZ==8);
  
@@ -349,9 +343,9 @@ $code.=<<___;
         or      %l3,$tmp0,$tmp0
  
         srlx    $tmp0,@sigma0[0],$T1
-       ld      [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+0`],%l2
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+0`],%l2
         sllx    $tmp0,`64-@sigma0[2]`,$tmp1
-       ld      [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+4`],%l3
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+4`],%l3
         srlx    $tmp0,@sigma0[1],$tmp0
         xor     $tmp1,$T1,$T1
         sllx    $tmp1,`@sigma0[2]-@sigma0[1]`,$tmp1
@@ -363,9 +357,9 @@ $code.=<<___;
         or      %l7,$tmp2,$tmp2
  
         srlx    $tmp2,@sigma1[0],$tmp1
-       ld      [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+0`],%l6
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+0`],%l6
         sllx    $tmp2,`64-@sigma1[2]`,$tmp0
-       ld      [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+4`],%l7
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+4`],%l7
         srlx    $tmp2,@sigma1[1],$tmp2
         xor     $tmp0,$tmp1,$tmp1
         sllx    $tmp0,`@sigma1[2]-@sigma1[1]`,$tmp0
@@ -374,29 +368,30 @@ $code.=<<___;
         xor     $tmp0,$tmp1,$tmp1
         sllx    %l4,32,$tmp0
         xor     $tmp2,$tmp1,$tmp1       ! sigma1(X[$i+14])
-       ld      [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+0`],%l4
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+0`],%l4
         or      %l5,$tmp0,$tmp0
-       ld      [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+4`],%l5
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+4`],%l5
  
         sllx    %l0,32,$tmp2
         add     $tmp1,$T1,$T1
-       ld      [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+0`],%l0
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+0`],%l0
         or      %l1,$tmp2,$tmp2
         add     $tmp0,$T1,$T1           ! +=X[$i+9]
-       ld      [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+4`],%l1
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+4`],%l1
         add     $tmp2,$T1,$T1           ! +=X[$i]
-       $ST     $T1,[%sp+`$bias+$frame+($i%16)*$SZ`]
+       $ST     $T1,[%sp+STACK_BIAS+STACK_FRAME+`($i%16)*$SZ`]
  ___
      &BODY_00_15(@_);
  } if ($SZ==8);
  
-$code.=<<___ if ($bits==64);
-.register      %g2,#scratch
-.register      %g3,#scratch
-___
  $code.=<<___;
  #include "sparc_arch.h"
  
+#ifdef __arch64__
+.register      %g2,#scratch
+.register      %g3,#scratch
+#endif
+
  .section       ".text",#alloc,#execinstr
  
  .align 64
@@ -519,7 +514,7 @@ $code.=<<___ if ($SZ==8);           # SHA512
  
         .word   0x81b02860              ! SHA512
  
-       bne,pt  `$bits==64?"%xcc":"%icc"`, .Lhwaligned_loop
+       bne,pt  SIZE_T_CC, .Lhwaligned_loop
         nop
  
  .Lhwfinish:
@@ -579,7 +574,7 @@ $code.=<<___ if ($SZ==8);           # SHA512
  
         .word   0x81b02860              ! SHA512
  
-       bne,pt  `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
+       bne,pt  SIZE_T_CC, .Lhwunaligned_loop
         for     %f50, %f50, %f18        ! %f18=%f50
  
         ba      .Lhwfinish
@@ -612,7 +607,7 @@ $code.=<<___ if ($SZ==4);           # SHA256
  
         .word   0x81b02840              ! SHA256
  
-       bne,pt  `$bits==64?"%xcc":"%icc"`, .Lhwloop
+       bne,pt  SIZE_T_CC, .Lhwloop
         nop
  
  .Lhwfinish:
@@ -655,7 +650,7 @@ $code.=<<___ if ($SZ==4);           # SHA256
  
         .word   0x81b02840              ! SHA256
  
-       bne,pt  `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
+       bne,pt  SIZE_T_CC, .Lhwunaligned_loop
         for     %f26, %f26, %f10        ! %f10=%f26
  
         ba      .Lhwfinish
@@ -664,7 +659,7 @@ ___
  $code.=<<___;
  .align 16
  .Lsoftware:
-       save    %sp,`-$frame-$locals`,%sp
+       save    %sp,-STACK_FRAME-$locals,%sp
         and     $inp,`$align-1`,$tmp31
         sllx    $len,`log(16*$SZ)/log(2)`,$len
         andn    $inp,`$align-1`,$inp
@@ -783,7 +778,7 @@ ___
  $code.=<<___;
         add     $inp,`16*$SZ`,$inp              ! advance inp
         cmp     $inp,$len
-       bne     `$bits==64?"%xcc":"%icc"`,.Lloop
+       bne     SIZE_T_CC,.Lloop
         sub     $Ktbl,`($rounds-16)*$SZ`,$Ktbl  ! rewind Ktbl
  
         ret
diff --git a/crypto/sparc_arch.h b/crypto/sparc_arch.h

index 1a8fca95b894c7a598b027084c6a3d8939cdd8f3..1afef4b8a486dc36516899600c9868e7ea28628d 100644 (file)
--- a/crypto/sparc_arch.h
+++ b/crypto/sparc_arch.h
@@ -32,6 +32,10 @@
  # define __PIC__
  #endif
  
+#if defined(__SUNPRO_C) && defined(__sparcv9) && !defined(__arch64__)
+# define __arch64__
+#endif
+
  #define SPARC_PIC_THUNK(reg)   \
         .align  32;             \
  .Lpic_thunk:                   \
@@ -53,18 +57,23 @@
         add     %o7, reg, reg
  #endif
  
-#if    (defined(__GNUC__) && defined(__arch64__)) || \
-       (defined(__SUNPRO_C) && defined(__sparcv9))
+#if defined(__arch64__)
  
  # define SPARC_LOAD_ADDRESS(SYM, reg)  \
         setx    SYM, %o7, reg;
-# define LDPTR ldx
+# define LDPTR         ldx
+# define SIZE_T_CC     %xcc
+# define STACK_FRAME   192
+# define STACK_BIAS    2047
  
  #else
  
  # define SPARC_LOAD_ADDRESS(SYM, reg)  \
         set     SYM, reg;
-# define LDPTR ld
+# define LDPTR         ld
+# define SIZE_T_CC     %icc
+# define STACK_FRAME   112
+# define STACK_BIAS    0
  # define SPARC_LOAD_ADDRESS_LEAF(SYM,reg,tmp) SPARC_LOAD_ADDRESS(SYM,reg)
  
  #endif
author	Andy Polyakov <appro@openssl.org>
	Thu, 25 Oct 2012 12:07:32 +0000 (12:07 +0000)
committer	Andy Polyakov <appro@openssl.org>
	Thu, 25 Oct 2012 12:07:32 +0000 (12:07 +0000)
crypto/bn/asm/sparcv9-gf2m.pl		patch \| blob \| history
crypto/md5/asm/md5-sparcv9.pl		patch \| blob \| history
crypto/sha/asm/sha1-sparcv9.pl		patch \| blob \| history
crypto/sha/asm/sha512-sparcv9.pl		patch \| blob \| history
crypto/sparc_arch.h		patch \| blob \| history