md5-sparcv9.pl: avoid %asi modifications, improve short input performance

author Andy Polyakov <appro@openssl.org>

Sun, 14 Oct 2012 16:51:27 +0000 (16:51 +0000)

committer Andy Polyakov <appro@openssl.org>

Sun, 14 Oct 2012 16:51:27 +0000 (16:51 +0000)
author Andy Polyakov <appro@openssl.org>
Sun, 14 Oct 2012 16:51:27 +0000 (16:51 +0000)
committer Andy Polyakov <appro@openssl.org>
Sun, 14 Oct 2012 16:51:27 +0000 (16:51 +0000)
diff --git a/crypto/md5/asm/md5-sparcv9.pl b/crypto/md5/asm/md5-sparcv9.pl

index 062a0738c439536c9d601434d8b35d7769a77edb..ef16666cc34276f9b2030cbdef0a8b72d67c0dc9 100644 (file)
--- a/crypto/md5/asm/md5-sparcv9.pl
+++ b/crypto/md5/asm/md5-sparcv9.pl
@@ -12,7 +12,7 @@
  # MD5 for SPARCv9, 6.9 cycles per byte on UltraSPARC, >40% faster than
  # code generated by Sun C 5.2.
  
-# SPARC T4 MD5 hardware achieves 3.24 cycles per byte, which is 2.1x
+# SPARC T4 MD5 hardware achieves 3.20 cycles per byte, which is 2.1x
  # faster than software. Multi-process benchmark saturates at 12x
  # single-process result on 8-core processor, or ~11GBps per 2.85GHz
  # socket.
@@ -221,15 +221,15 @@ md5_block_asm_data_order:
         be      .Lsoftware
         nop
  
-       rd      %asi, %g5
-       wr      %g0, 0x88, %asi         ! ASI_PRIMARY_LITTLE
- 
-       lda     [%o0 + 0x00] %asi, %f0  ! load context
-       lda     [%o0 + 0x04] %asi, %f1
+       mov     4, %g1
         andcc   %o1, 0x7, %g0
-       lda     [%o0 + 0x08] %asi, %f2
+       lda     [%o0 + %g0]0x88, %f0            ! load context
+       lda     [%o0 + %g1]0x88, %f1
+       add     %o0, 8, %o0
+       lda     [%o0 + %g0]0x88, %f2
+       lda     [%o0 + %g1]0x88, %f3
         bne,pn  %icc, .Lhwunaligned
-        lda    [%o0 + 0x0c] %asi, %f3
+       sub     %o0, 8, %o0
  
  .Lhw_loop:
         ldd     [%o1 + 0x00], %f8
@@ -250,12 +250,13 @@ md5_block_asm_data_order:
         nop
  
  .Lhwfinish:
-       sta     %f0, [%o0 + 0x00] %asi  ! store context
-       sta     %f1, [%o0 + 0x04] %asi
-       sta     %f2, [%o0 + 0x08] %asi
-       sta     %f3, [%o0 + 0x0c] %asi
+       sta     %f0, [%o0 + %g0]0x88    ! store context
+       sta     %f1, [%o0 + %g1]0x88
+       add     %o0, 8, %o0
+       sta     %f2, [%o0 + %g0]0x88
+       sta     %f3, [%o0 + %g1]0x88
         retl
-        wr     %g5, 0x0, %asi          ! restore %asi
+       nop
  
  .align 8
  .Lhwunaligned:
author	Andy Polyakov <appro@openssl.org>
	Sun, 14 Oct 2012 16:51:27 +0000 (16:51 +0000)
committer	Andy Polyakov <appro@openssl.org>
	Sun, 14 Oct 2012 16:51:27 +0000 (16:51 +0000)