Fix s390x bugs and correct performance coefficients.

author Andy Polyakov <appro@openssl.org>

Wed, 2 May 2007 11:44:02 +0000 (11:44 +0000)

committer Andy Polyakov <appro@openssl.org>

Wed, 2 May 2007 11:44:02 +0000 (11:44 +0000)
author Andy Polyakov <appro@openssl.org>
Wed, 2 May 2007 11:44:02 +0000 (11:44 +0000)
committer Andy Polyakov <appro@openssl.org>
Wed, 2 May 2007 11:44:02 +0000 (11:44 +0000)
diff --git a/crypto/aes/asm/aes-s390x.pl b/crypto/aes/asm/aes-s390x.pl

index 4a9713aea1abfee98cf0f20b5d441a1a26529f83..3fcb9a6c299b70298f25d6032c6c6393d1fbb96b 100644 (file)
--- a/crypto/aes/asm/aes-s390x.pl
+++ b/crypto/aes/asm/aes-s390x.pl
@@ -23,7 +23,7 @@
  # for CBC is not utilized, nor multiple blocks are ever processed.
  # Then software key schedule can be postponed till hardware support
  # detection... Performance improvement over assembler is reportedly
-# ~2.5x, but can reach >15x [naturally on larger chunks] if proper
+# ~2.5x, but can reach >8x [naturally on larger chunks] if proper
  # support is implemented.
  
  $t1="%r0";
diff --git a/crypto/sha/asm/sha1-s390x.pl b/crypto/sha/asm/sha1-s390x.pl

index 9f4436d525e3752581a21f3b435b7c5d1257efc5..5c36436d4542449ad07802d8cc9a5daffccc332a 100644 (file)
--- a/crypto/sha/asm/sha1-s390x.pl
+++ b/crypto/sha/asm/sha1-s390x.pl
@@ -13,7 +13,7 @@
  #
  # Performance is >30% better than gcc 3.3 generated code. But the real
  # twist is that SHA1 hardware support is detected and utilized. In
-# which case performance can reach further >8x for larger chunks.
+# which case performance can reach further >4.5x for larger chunks.
  
  $kimdfunc=1;   # magic function code for kimd instruction
  
@@ -160,6 +160,7 @@ $code.=<<___ if ($kimdfunc);
         lgr     %r2,$inp
         sllg    %r3,$len,6
         .long   0xb93e0002      # kimd %r0,%r2
+       brc     1,.-4           # pay attention to "partial completion"
         br      %r14
  .Lsoftware:
  ___
diff --git a/crypto/sha/asm/sha512-s390x.pl b/crypto/sha/asm/sha512-s390x.pl

index 67a17d3808a5ef4f4491fb09a87cc804cd4268c9..5dd17473fabd231ecb38e8fa5bda7dd4b8e9a479 100644 (file)
--- a/crypto/sha/asm/sha512-s390x.pl
+++ b/crypto/sha/asm/sha512-s390x.pl
@@ -16,7 +16,7 @@
  # "pathologically" high, in particular in comparison to other SHA
  # modules). But the real twist is that it detects if hardware support
  # for SHA256 is available and in such case utilizes it. Then the
-# performance can reach >12x of assembler one for larger chunks.
+# performance can reach >6.5x of assembler one for larger chunks.
  #
  # sha512_block_data_order is ~70% faster than gcc 3.3 generated code.
  
@@ -219,6 +219,7 @@ $code.=<<___ if ($kimdfunc);
         lgr     %r2,$inp
         sllg    %r3,$len,`log(16*$SZ)/log(2)`
         .long   0xb93e0002      # kimd %r0,%r2
+       brc     1,.-4           # pay attention to "partial completion"
         br      %r14
  .Lsoftware:
  ___
author	Andy Polyakov <appro@openssl.org>
	Wed, 2 May 2007 11:44:02 +0000 (11:44 +0000)
committer	Andy Polyakov <appro@openssl.org>
	Wed, 2 May 2007 11:44:02 +0000 (11:44 +0000)
crypto/aes/asm/aes-s390x.pl		patch \| blob \| history
crypto/sha/asm/sha1-s390x.pl		patch \| blob \| history
crypto/sha/asm/sha512-s390x.pl		patch \| blob \| history