From: Rich Salz Date: Wed, 14 Jun 2017 19:07:52 +0000 (-0400) Subject: Undo commit cd359b2 X-Git-Tag: OpenSSL_1_1_1-pre1~1106 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=28f298e70aa8c65b275e6c915b5717a59090932d;p=openssl Undo commit cd359b2 Original text: Clarify use of |$end0| in stitched x86-64 AES-GCM code. There was some uncertainty about what the code is doing with |$end0| and whether it was necessary for |$len| to be a multiple of 16 or 96. Hopefully these added comments make it clear that the code is correct except for the caveat regarding low memory addresses. Change-Id: Iea546a59dc7aeb400f50ac5d2d7b9cb88ace9027 Reviewed-on: https://boringssl-review.googlesource.com/7194 Reviewed-by: Adam Langley Reviewed-by: Richard Levitte Reviewed-by: Tim Hudson (Merged from https://github.com/openssl/openssl/pull/3700) --- diff --git a/crypto/modes/asm/aesni-gcm-x86_64.pl b/crypto/modes/asm/aesni-gcm-x86_64.pl index 5e69cb86fa..3cd231e22f 100644 --- a/crypto/modes/asm/aesni-gcm-x86_64.pl +++ b/crypto/modes/asm/aesni-gcm-x86_64.pl @@ -116,23 +116,6 @@ _aesni_ctr32_ghash_6x: vpxor $rndkey,$inout3,$inout3 vmovups 0x10-0x80($key),$T2 # borrow $T2 for $rndkey vpclmulqdq \$0x01,$Hkey,$Z3,$Z2 - - # At this point, the current block of 96 (0x60) bytes has already been - # loaded into registers. Concurrently with processing it, we want to - # load the next 96 bytes of input for the next round. Obviously, we can - # only do this if there are at least 96 more bytes of input beyond the - # input we're currently processing, or else we'd read past the end of - # the input buffer. Here, we set |%r12| to 96 if there are at least 96 - # bytes of input beyond the 96 bytes we're already processing, and we - # set |%r12| to 0 otherwise. In the case where we set |%r12| to 96, - # we'll read in the next block so that it is in registers for the next - # loop iteration. In the case where we set |%r12| to 0, we'll re-read - # the current block and then ignore what we re-read. - # - # At this point, |$in0| points to the current (already read into - # registers) block, and |$end0| points to 2*96 bytes before the end of - # the input. Thus, |$in0| > |$end0| means that we do not have the next - # 96-byte block to read in, and |$in0| <= |$end0| means we do. xor %r12,%r12 cmp $in0,$end0 @@ -426,9 +409,6 @@ $code.=<<___; aesni_gcm_decrypt: .cfi_startproc xor $ret,$ret - - # We call |_aesni_ctr32_ghash_6x|, which requires at least 96 (0x60) - # bytes of input. cmp \$0x60,$len # minimal accepted length jb .Lgcm_dec_abort @@ -490,15 +470,7 @@ $code.=<<___; vmovdqu 0x50($inp),$Z3 # I[5] lea ($inp),$in0 vmovdqu 0x40($inp),$Z0 - - # |_aesni_ctr32_ghash_6x| requires |$end0| to point to 2*96 (0xc0) - # bytes before the end of the input. Note, in particular, that this is - # correct even if |$len| is not an even multiple of 96 or 16. XXX: This - # seems to require that |$inp| + |$len| >= 2*96 (0xc0); i.e. |$inp| must - # not be near the very beginning of the address space when |$len| < 2*96 - # (0xc0). lea -0xc0($inp,$len),$end0 - vmovdqu 0x30($inp),$Z1 shr \$4,$len xor $ret,$ret @@ -663,10 +635,6 @@ _aesni_ctr32_6x: aesni_gcm_encrypt: .cfi_startproc xor $ret,$ret - - # We call |_aesni_ctr32_6x| twice, each call consuming 96 bytes of - # input. Then we call |_aesni_ctr32_ghash_6x|, which requires at - # least 96 more bytes of input. cmp \$0x60*3,$len # minimal accepted length jb .Lgcm_enc_abort @@ -723,16 +691,7 @@ $code.=<<___; .Lenc_no_key_aliasing: lea ($out),$in0 - - # |_aesni_ctr32_ghash_6x| requires |$end0| to point to 2*96 (0xc0) - # bytes before the end of the input. Note, in particular, that this is - # correct even if |$len| is not an even multiple of 96 or 16. Unlike in - # the decryption case, there's no caveat that |$out| must not be near - # the very beginning of the address space, because we know that - # |$len| >= 3*96 from the check above, and so we know - # |$out| + |$len| >= 2*96 (0xc0). lea -0xc0($out,$len),$end0 - shr \$4,$len call _aesni_ctr32_6x