From 47edeb9f597c9c5ab127398330959bf0343f050e Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Fri, 15 Feb 2013 11:23:06 +0100 Subject: [PATCH] sha256-586.pl: code refresh. --- crypto/sha/asm/sha256-586.pl | 39 +++++++++++++++++------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/crypto/sha/asm/sha256-586.pl b/crypto/sha/asm/sha256-586.pl index 7244b00c87..447c50b0cc 100644 --- a/crypto/sha/asm/sha256-586.pl +++ b/crypto/sha/asm/sha256-586.pl @@ -33,10 +33,11 @@ # PIII P4 AMD K8 Core2 SB Atom Bldzr # gcc 36 41 27 26 25 50 36 # icc 33 38 25 23 - - - -# x86 asm(*) 27/24 28 19/15.5 18/15.6 12.5 30/25 16.6 +# x86 asm(*) 27/24 28 19/15.5 18/15.6 12.3 30/25 16.6 # x86_64 asm(**) 17.5 15.1 13.9 11.6 22 13.7 # -# (*) numbers after slash are for unrolled loop, where available; +# (*) numbers after slash are for unrolled loop, where available, +# otherwise best applicable such as AVX/XOP; # (**) x86_64 assembly performance is presented for reference # purposes. @@ -775,32 +776,28 @@ sub Xupdate_AVX () { '&vpsrld ($t3,$t0,3);', '&vpslld ($t1,$t0,14);', '&vpxor ($t0,$t3,$t2);', + '&vpshufd ($t3,@X[3],0b11111010)',# X[14..15] '&vpsrld ($t2,$t2,18-7);', '&vpxor ($t0,$t0,$t1);', '&vpslld ($t1,$t1,25-14);', '&vpxor ($t0,$t0,$t2);', - '&vpsrld ($t3,@X[3],10);', + '&vpsrld ($t2,$t3,10);', '&vpxor ($t0,$t0,$t1);', # sigma0(X[1..4]) - '&vpslld ($t2,@X[3],13);', + '&vpsrlq ($t1,$t3,17);', '&vpaddd (@X[0],@X[0],$t0);', # X[0..3] += sigma0(X[1..4]) - '&vpsrld ($t1,@X[3],17);', - '&vpxor ($t3,$t3,$t2);', - '&vpslld ($t2,$t2,15-13);', - '&vpxor ($t3,$t3,$t1);', - '&vpsrld ($t1,$t1,19-17);', - '&vpxor ($t3,$t3,$t2);', - '&vpxor ($t3,$t3,$t1);', # sigma1(X[14..15]) + '&vpxor ($t2,$t2,$t1);', + '&vpsrlq ($t3,$t3,19);', + '&vpxor ($t2,$t2,$t3);', # sigma1(X[14..15] + '&vpshufd ($t3,$t2,0b10000100);', '&vpsrldq ($t3,$t3,8);', '&vpaddd (@X[0],@X[0],$t3);', # X[0..1] += sigma1(X[14..15]) - '&vpsrld ($t3,@X[0],10);', - '&vpslld ($t2,@X[0],13);', - '&vpsrld ($t1,@X[0],17);', - '&vpxor ($t3,$t3,$t2);', - '&vpslld ($t2,$t2,15-13);', - '&vpxor ($t3,$t3,$t1);', - '&vpsrld ($t1,$t1,19-17);', - '&vpxor ($t3,$t3,$t2);', - '&vpxor ($t3,$t3,$t1);', # sigma1(X[16..17]) + '&vpshufd ($t3,@X[0],0b01010000)',# X[16..17] + '&vpsrld ($t2,$t3,10);', + '&vpsrlq ($t1,$t3,17);', + '&vpxor ($t2,$t2,$t1);', + '&vpsrlq ($t3,$t3,19);', + '&vpxor ($t2,$t2,$t3);', # sigma1(X[16..17] + '&vpshufd ($t3,$t2,0b11101000);', '&vpslldq ($t3,$t3,8);', '&vpaddd (@X[0],@X[0],$t3);' # X[2..3] += sigma1(X[16..17]) ); @@ -813,7 +810,7 @@ my $body = shift; my @X = @_; my @insns = (&$body,&$body,&$body,&$body); # 120 instructions - foreach (Xupdate_AVX()) { # 35 instructions + foreach (Xupdate_AVX()) { # 31 instructions eval; eval(shift(@insns)); eval(shift(@insns)); -- 2.40.0