From: Andy Polyakov Date: Sat, 4 Jun 2016 17:24:57 +0000 (+0200) Subject: aes/asm/aesp8-ppc.pl: implement "tweak chaining". X-Git-Tag: OpenSSL_1_1_0-pre6~439 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=627c95337611151b4293944e1b706fea7e1ba5dc;p=openssl aes/asm/aesp8-ppc.pl: implement "tweak chaining". This is useful in Linux kernel context, in cases data happens to be fragmented and processing can take multiple calls. Reviewed-by: Rich Salz --- diff --git a/crypto/aes/asm/aesp8-ppc.pl b/crypto/aes/asm/aesp8-ppc.pl index 459ef88a29..3fdf1ecda0 100755 --- a/crypto/aes/asm/aesp8-ppc.pl +++ b/crypto/aes/asm/aesp8-ppc.pl @@ -1909,6 +1909,15 @@ ___ ######################################################################### {{{ # XTS procedures # +# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # +# const AES_KEY *key1, const AES_KEY *key2, # +# [const] unsigned char iv[16]); # +# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # +# input tweak value is assumed to be encrypted already, and last tweak # +# value, one suitable for consecutive call on same chunk of data, is # +# written back to original buffer. In addition, in "tweak chaining" # +# mode only complete input blocks are processed. # + my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); @@ -1943,18 +1952,21 @@ $code.=<<___; le?vxor $inpperm,$inpperm,$tmp vperm $tweak,$tweak,$inptail,$inpperm - ?lvsl $keyperm,0,$key2 # prepare for unaligned key - lwz $rounds,240($key2) - srwi $rounds,$rounds,1 - subi $rounds,$rounds,1 - li $idx,16 - neg r11,$inp lvsr $inpperm,0,r11 # prepare for unaligned load lvx $inout,0,$inp addi $inp,$inp,15 # 15 is not typo le?vxor $inpperm,$inpperm,$tmp + ${UCMP}i $key2,0 # key2==NULL? + beq Lxts_enc_no_key2 + + ?lvsl $keyperm,0,$key2 # prepare for unaligned key + lwz $rounds,240($key2) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + lvx $rndkey0,0,$key2 lvx $rndkey1,$idx,$key2 addi $idx,$idx,16 @@ -1978,10 +1990,18 @@ Ltweak_xts_enc: ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm vcipher $tweak,$tweak,$rndkey1 lvx $rndkey1,$idx,$key2 - li $idx,16 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm vcipherlast $tweak,$tweak,$rndkey0 + li $ivp,0 # don't chain the tweak + b Lxts_enc + +Lxts_enc_no_key2: + li $idx,-16 + and $len,$len,$idx # in "tweak chaining" + # mode only complete + # blocks are processed +Lxts_enc: lvx $inptail,0,$inp addi $inp,$inp,16 @@ -2097,6 +2117,19 @@ Loop_xts_enc_steal: b Loop_xts_enc # one more time... Lxts_enc_done: + ${UCMP}i $ivp,0 + beq Lxts_enc_ret + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_enc_ret: mtspr 256,r12 # restore vrsave li r3,0 blr @@ -2135,18 +2168,21 @@ Lxts_enc_done: le?vxor $inpperm,$inpperm,$tmp vperm $tweak,$tweak,$inptail,$inpperm - ?lvsl $keyperm,0,$key2 # prepare for unaligned key - lwz $rounds,240($key2) - srwi $rounds,$rounds,1 - subi $rounds,$rounds,1 - li $idx,16 - neg r11,$inp lvsr $inpperm,0,r11 # prepare for unaligned load lvx $inout,0,$inp addi $inp,$inp,15 # 15 is not typo le?vxor $inpperm,$inpperm,$tmp + ${UCMP}i $key2,0 # key2==NULL? + beq Lxts_dec_no_key2 + + ?lvsl $keyperm,0,$key2 # prepare for unaligned key + lwz $rounds,240($key2) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + lvx $rndkey0,0,$key2 lvx $rndkey1,$idx,$key2 addi $idx,$idx,16 @@ -2170,10 +2206,19 @@ Ltweak_xts_dec: ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm vcipher $tweak,$tweak,$rndkey1 lvx $rndkey1,$idx,$key2 - li $idx,16 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm vcipherlast $tweak,$tweak,$rndkey0 + li $ivp,0 # don't chain the tweak + b Lxts_dec + +Lxts_dec_no_key2: + neg $idx,$len + andi. $idx,$idx,15 + add $len,$len,$idx # in "tweak chaining" + # mode only complete + # blocks are processed +Lxts_dec: lvx $inptail,0,$inp addi $inp,$inp,16 @@ -2328,6 +2373,19 @@ Loop_xts_dec_steal: b Loop_xts_dec # one more time... Lxts_dec_done: + ${UCMP}i $ivp,0 + beq Lxts_dec_ret + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_dec_ret: mtspr 256,r12 # restore vrsave li r3,0 blr @@ -2338,8 +2396,8 @@ Lxts_dec_done: ___ ######################################################################### {{ # Optimized XTS procedures # -my $key_="r11"; -my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); +my $key_=$key2; +my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); $x00=0 if ($flavour =~ /osx/); my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); @@ -2353,33 +2411,32 @@ $code.=<<___; .align 5 _aesp8_xts_encrypt6x: $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) - mflr r0 + mflr r11 li r7,`$FRAME+8*16+15` - li r8,`$FRAME+8*16+31` - $PUSH r0,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) + li r3,`$FRAME+8*16+31` + $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) stvx v20,r7,$sp # ABI says so addi r7,r7,32 - stvx v21,r8,$sp - addi r8,r8,32 + stvx v21,r3,$sp + addi r3,r3,32 stvx v22,r7,$sp addi r7,r7,32 - stvx v23,r8,$sp - addi r8,r8,32 + stvx v23,r3,$sp + addi r3,r3,32 stvx v24,r7,$sp addi r7,r7,32 - stvx v25,r8,$sp - addi r8,r8,32 + stvx v25,r3,$sp + addi r3,r3,32 stvx v26,r7,$sp addi r7,r7,32 - stvx v27,r8,$sp - addi r8,r8,32 + stvx v27,r3,$sp + addi r3,r3,32 stvx v28,r7,$sp addi r7,r7,32 - stvx v29,r8,$sp - addi r8,r8,32 + stvx v29,r3,$sp + addi r3,r3,32 stvx v30,r7,$sp - stvx v31,r8,$sp - mr r7,r0 + stvx v31,r3,$sp li r0,-1 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave li $x10,0x10 @@ -2842,12 +2899,12 @@ Lxts_enc6x_steal: vperm $out0,$out0,$out1,$inpperm vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? - subi r3,$out,17 + subi r30,$out,17 subi $out,$out,16 mtctr $taillen Loop_xts_enc6x_steal: - lbzu r0,1(r3) - stb r0,16(r3) + lbzu r0,1(r30) + stb r0,16(r30) bdnz Loop_xts_enc6x_steal li $taillen,0 @@ -2856,7 +2913,15 @@ Loop_xts_enc6x_steal: .align 4 Lxts_enc6x_done: - mtlr r7 + ${UCMP}i $ivp,0 + beq Lxts_enc6x_ret + + vxor $tweak,$twk0,$rndkey0 + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_enc6x_ret: + mtlr r11 li r10,`$FRAME+15` li r11,`$FRAME+31` stvx $seven,r10,$sp # wipe copies of round keys @@ -2998,33 +3063,32 @@ _aesp8_xts_enc5x: .align 5 _aesp8_xts_decrypt6x: $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) - mflr r0 + mflr r11 li r7,`$FRAME+8*16+15` - li r8,`$FRAME+8*16+31` - $PUSH r0,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) + li r3,`$FRAME+8*16+31` + $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) stvx v20,r7,$sp # ABI says so addi r7,r7,32 - stvx v21,r8,$sp - addi r8,r8,32 + stvx v21,r3,$sp + addi r3,r3,32 stvx v22,r7,$sp addi r7,r7,32 - stvx v23,r8,$sp - addi r8,r8,32 + stvx v23,r3,$sp + addi r3,r3,32 stvx v24,r7,$sp addi r7,r7,32 - stvx v25,r8,$sp - addi r8,r8,32 + stvx v25,r3,$sp + addi r3,r3,32 stvx v26,r7,$sp addi r7,r7,32 - stvx v27,r8,$sp - addi r8,r8,32 + stvx v27,r3,$sp + addi r3,r3,32 stvx v28,r7,$sp addi r7,r7,32 - stvx v29,r8,$sp - addi r8,r8,32 + stvx v29,r3,$sp + addi r3,r3,32 stvx v30,r7,$sp - stvx v31,r8,$sp - mr r7,r0 + stvx v31,r3,$sp li r0,-1 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave li $x10,0x10 @@ -3524,11 +3588,11 @@ Lxts_dec6x_steal: vsel $out0,$in0,$tmp,$out0 vxor $out0,$out0,$twk0 - subi r3,$out,1 + subi r30,$out,1 mtctr $taillen Loop_xts_dec6x_steal: - lbzu r0,1(r3) - stb r0,16(r3) + lbzu r0,1(r30) + stb r0,16(r30) bdnz Loop_xts_dec6x_steal li $taillen,0 @@ -3537,7 +3601,15 @@ Loop_xts_dec6x_steal: .align 4 Lxts_dec6x_done: - mtlr r7 + ${UCMP}i $ivp,0 + beq Lxts_dec6x_ret + + vxor $tweak,$twk0,$rndkey0 + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_dec6x_ret: + mtlr r11 li r10,`$FRAME+15` li r11,`$FRAME+31` stvx $seven,r10,$sp # wipe copies of round keys