From ae007d4d09f2ed9940c6e581bab9290d78615506 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sun, 15 Jul 2012 13:29:23 +0000 Subject: [PATCH] wp-mmx.pl: ~10% performance improvement. --- crypto/whrlpool/asm/wp-mmx.pl | 42 ++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/crypto/whrlpool/asm/wp-mmx.pl b/crypto/whrlpool/asm/wp-mmx.pl index cb2381c22b..c584e5b92b 100644 --- a/crypto/whrlpool/asm/wp-mmx.pl +++ b/crypto/whrlpool/asm/wp-mmx.pl @@ -118,34 +118,36 @@ $tbl="ebp"; &movq (@mm[0],&QWP(2048*$SCALE,$tbl,"esi",8)); # rc[r] &mov ("eax",&DWP(0,"esp")); &mov ("ebx",&DWP(4,"esp")); + &movz ("ecx",&LB("eax")); + &movz ("edx",&HB("eax")); for($i=0;$i<8;$i++) { my $func = ($i==0)? \&movq : \&pxor; - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); + &shr ("eax",16); &scale ("esi","ecx"); + &movz ("ecx",&LB("eax")); &scale ("edi","edx"); - &shr ("eax",16); + &movz ("edx",&HB("eax")); &pxor (@mm[0],&QWP(&row(0),$tbl,"esi",8)); &$func (@mm[1],&QWP(&row(1),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); &mov ("eax",&DWP(($i+1)*8,"esp")); &scale ("esi","ecx"); + &movz ("ecx",&LB("ebx")); &scale ("edi","edx"); + &movz ("edx",&HB("ebx")); &$func (@mm[2],&QWP(&row(2),$tbl,"esi",8)); &$func (@mm[3],&QWP(&row(3),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); + &shr ("ebx",16); &scale ("esi","ecx"); + &movz ("ecx",&LB("ebx")); &scale ("edi","edx"); - &shr ("ebx",16); + &movz ("edx",&HB("ebx")); &$func (@mm[4],&QWP(&row(4),$tbl,"esi",8)); &$func (@mm[5],&QWP(&row(5),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); &mov ("ebx",&DWP(($i+1)*8+4,"esp")); &scale ("esi","ecx"); + &movz ("ecx",&LB("eax")); &scale ("edi","edx"); + &movz ("edx",&HB("eax")); &$func (@mm[6],&QWP(&row(6),$tbl,"esi",8)); &$func (@mm[7],&QWP(&row(7),$tbl,"edi",8)); push(@mm,shift(@mm)); @@ -154,32 +156,32 @@ for($i=0;$i<8;$i++) { for($i=0;$i<8;$i++) { &movq(&QWP($i*8,"esp"),@mm[$i]); } # K=L for($i=0;$i<8;$i++) { - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); + &shr ("eax",16); &scale ("esi","ecx"); + &movz ("ecx",&LB("eax")); &scale ("edi","edx"); - &shr ("eax",16); + &movz ("edx",&HB("eax")); &pxor (@mm[0],&QWP(&row(0),$tbl,"esi",8)); &pxor (@mm[1],&QWP(&row(1),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); &mov ("eax",&DWP(64+($i+1)*8,"esp")) if ($i<7); &scale ("esi","ecx"); + &movz ("ecx",&LB("ebx")); &scale ("edi","edx"); + &movz ("edx",&HB("ebx")); &pxor (@mm[2],&QWP(&row(2),$tbl,"esi",8)); &pxor (@mm[3],&QWP(&row(3),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); + &shr ("ebx",16); &scale ("esi","ecx"); + &movz ("ecx",&LB("ebx")); &scale ("edi","edx"); - &shr ("ebx",16); + &movz ("edx",&HB("ebx")); &pxor (@mm[4],&QWP(&row(4),$tbl,"esi",8)); &pxor (@mm[5],&QWP(&row(5),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); &mov ("ebx",&DWP(64+($i+1)*8+4,"esp")) if ($i<7); &scale ("esi","ecx"); + &movz ("ecx",&LB("eax")); &scale ("edi","edx"); + &movz ("edx",&HB("eax")); &pxor (@mm[6],&QWP(&row(6),$tbl,"esi",8)); &pxor (@mm[7],&QWP(&row(7),$tbl,"edi",8)); push(@mm,shift(@mm)); -- 2.40.0