From cf3aeae419217f832ac2f351d6763d0503c8e1b7 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sat, 28 May 2011 09:41:36 +0000 Subject: [PATCH] aes-ppc.pl: handle unaligned data on page boundaries. --- crypto/aes/asm/aes-ppc.pl | 158 +++++++++++++++++++++++++++++++++++++- 1 file changed, 157 insertions(+), 1 deletion(-) diff --git a/crypto/aes/asm/aes-ppc.pl b/crypto/aes/asm/aes-ppc.pl index 8cfd4232b8..3e65b74851 100644 --- a/crypto/aes/asm/aes-ppc.pl +++ b/crypto/aes/asm/aes-ppc.pl @@ -7,7 +7,7 @@ # details see http://www.openssl.org/~appro/cryptogams/. # ==================================================================== -# Needs more work: key setup, page boundaries, CBC routine... +# Needs more work: key setup, CBC routine... # # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with # 128-bit key, which is ~40% better than 64-bit code generated by gcc @@ -359,6 +359,12 @@ $code.=<<___; $PUSH r31,`$FRAME-$SIZE_T*1`($sp) $PUSH r0,`$FRAME+$LRSAVE`($sp) + andi. $t0,$inp,3 + andi. $t1,$out,3 + or. $t0,$t0,$t1 + bne Lenc_unaligned + +Lenc_unaligned_ok: lwz $s0,0($inp) lwz $s1,4($inp) lwz $s2,8($inp) @@ -369,7 +375,79 @@ $code.=<<___; stw $s1,4($out) stw $s2,8($out) stw $s3,12($out) + b Lenc_done + +Lenc_unaligned: + subfic $t0,$inp,4096 + subfic $t1,$out,4096 + andi. $t0,$t0,4096-16 + beq Lenc_xpage + andi. $t1,$t1,4096-16 + bne Lenc_unaligned_ok + +Lenc_xpage: + lbz $acc00,0($inp) + lbz $acc01,1($inp) + lbz $acc02,2($inp) + lbz $s0,3($inp) + lbz $acc04,4($inp) + lbz $acc05,5($inp) + lbz $acc06,6($inp) + lbz $s1,7($inp) + lbz $acc08,8($inp) + lbz $acc09,9($inp) + lbz $acc10,10($inp) + lbz $s2,11($inp) + lbz $acc12,12($inp) + lbz $acc13,13($inp) + lbz $acc14,14($inp) + lbz $s3,15($inp) + insrwi $s0,$acc00,8,0 + insrwi $s1,$acc04,8,0 + insrwi $s0,$acc01,8,8 + insrwi $s1,$acc05,8,8 + insrwi $s0,$acc02,8,16 + insrwi $s1,$acc06,8,16 + insrwi $s2,$acc08,8,0 + insrwi $s3,$acc12,8,0 + insrwi $s2,$acc09,8,8 + insrwi $s3,$acc13,8,8 + insrwi $s2,$acc10,8,16 + insrwi $s3,$acc14,8,16 + + bl LAES_Te + bl Lppc_AES_encrypt_compact + + extrwi $acc00,$s0,8,0 + extrwi $acc01,$s0,8,8 + stb $acc00,0($out) + extrwi $acc02,$s0,8,16 + stb $acc01,1($out) + stb $acc02,2($out) + extrwi $acc04,$s1,8,0 + stb $s0,3($out) + extrwi $acc05,$s1,8,8 + stb $acc04,4($out) + extrwi $acc06,$s1,8,16 + stb $acc05,5($out) + stb $acc06,6($out) + extrwi $acc08,$s2,8,0 + stb $s1,7($out) + extrwi $acc09,$s2,8,8 + stb $acc08,8($out) + extrwi $acc10,$s2,8,16 + stb $acc09,9($out) + stb $acc10,10($out) + extrwi $acc12,$s3,8,0 + stb $s2,11($out) + extrwi $acc13,$s3,8,8 + stb $acc12,12($out) + extrwi $acc14,$s3,8,16 + stb $acc13,13($out) + stb $acc14,14($out) + stb $s3,15($out) +Lenc_done: $POP r0,`$FRAME+$LRSAVE`($sp) $POP $toc,`$FRAME-$SIZE_T*20`($sp) $POP r13,`$FRAME-$SIZE_T*19`($sp) @@ -715,6 +793,12 @@ Lenc_compact_done: $PUSH r31,`$FRAME-$SIZE_T*1`($sp) $PUSH r0,`$FRAME+$LRSAVE`($sp) + andi. $t0,$inp,3 + andi. $t1,$out,3 + or. $t0,$t0,$t1 + bne Ldec_unaligned + +Ldec_unaligned_ok: lwz $s0,0($inp) lwz $s1,4($inp) lwz $s2,8($inp) @@ -725,7 +809,79 @@ Lenc_compact_done: stw $s1,4($out) stw $s2,8($out) stw $s3,12($out) + b Ldec_done + +Ldec_unaligned: + subfic $t0,$inp,4096 + subfic $t1,$out,4096 + andi. $t0,$t0,4096-16 + beq Ldec_xpage + andi. $t1,$t1,4096-16 + bne Ldec_unaligned_ok + +Ldec_xpage: + lbz $acc00,0($inp) + lbz $acc01,1($inp) + lbz $acc02,2($inp) + lbz $s0,3($inp) + lbz $acc04,4($inp) + lbz $acc05,5($inp) + lbz $acc06,6($inp) + lbz $s1,7($inp) + lbz $acc08,8($inp) + lbz $acc09,9($inp) + lbz $acc10,10($inp) + lbz $s2,11($inp) + lbz $acc12,12($inp) + lbz $acc13,13($inp) + lbz $acc14,14($inp) + lbz $s3,15($inp) + insrwi $s0,$acc00,8,0 + insrwi $s1,$acc04,8,0 + insrwi $s0,$acc01,8,8 + insrwi $s1,$acc05,8,8 + insrwi $s0,$acc02,8,16 + insrwi $s1,$acc06,8,16 + insrwi $s2,$acc08,8,0 + insrwi $s3,$acc12,8,0 + insrwi $s2,$acc09,8,8 + insrwi $s3,$acc13,8,8 + insrwi $s2,$acc10,8,16 + insrwi $s3,$acc14,8,16 + + bl LAES_Td + bl Lppc_AES_decrypt_compact + + extrwi $acc00,$s0,8,0 + extrwi $acc01,$s0,8,8 + stb $acc00,0($out) + extrwi $acc02,$s0,8,16 + stb $acc01,1($out) + stb $acc02,2($out) + extrwi $acc04,$s1,8,0 + stb $s0,3($out) + extrwi $acc05,$s1,8,8 + stb $acc04,4($out) + extrwi $acc06,$s1,8,16 + stb $acc05,5($out) + stb $acc06,6($out) + extrwi $acc08,$s2,8,0 + stb $s1,7($out) + extrwi $acc09,$s2,8,8 + stb $acc08,8($out) + extrwi $acc10,$s2,8,16 + stb $acc09,9($out) + stb $acc10,10($out) + extrwi $acc12,$s3,8,0 + stb $s2,11($out) + extrwi $acc13,$s3,8,8 + stb $acc12,12($out) + extrwi $acc14,$s3,8,16 + stb $acc13,13($out) + stb $acc14,14($out) + stb $s3,15($out) +Ldec_done: $POP r0,`$FRAME+$LRSAVE`($sp) $POP $toc,`$FRAME-$SIZE_T*20`($sp) $POP r13,`$FRAME-$SIZE_T*19`($sp) -- 2.40.0