Clarify use of |$end0| in stitched x86-64 AES-GCM code.
There was some uncertainty about what the code is doing with |$end0| and whether it was necessary for |$len| to be a multiple of 16 or 96. Hopefully these added comments make it clear that the code is correct except for the caveat regarding low memory addresses. Change-Id: Iea546a59dc7aeb400f50ac5d2d7b9cb88ace9027 Reviewed-on: https://boringssl-review.googlesource.com/7194 Reviewed-by: Adam Langley <agl@google.com> Signed-off-by: Andy Polyakov <appro@openssl.org> Reviewed-by: Rich Salz <rsalz@openssl.org>
This commit is contained in:
parent
0b919cc5d5
commit
cd359b2564
1 changed files with 41 additions and 0 deletions
|
@ -116,6 +116,23 @@ _aesni_ctr32_ghash_6x:
|
||||||
vpxor $rndkey,$inout3,$inout3
|
vpxor $rndkey,$inout3,$inout3
|
||||||
vmovups 0x10-0x80($key),$T2 # borrow $T2 for $rndkey
|
vmovups 0x10-0x80($key),$T2 # borrow $T2 for $rndkey
|
||||||
vpclmulqdq \$0x01,$Hkey,$Z3,$Z2
|
vpclmulqdq \$0x01,$Hkey,$Z3,$Z2
|
||||||
|
|
||||||
|
# At this point, the current block of 96 (0x60) bytes has already been
|
||||||
|
# loaded into registers. Concurrently with processing it, we want to
|
||||||
|
# load the next 96 bytes of input for the next round. Obviously, we can
|
||||||
|
# only do this if there are at least 96 more bytes of input beyond the
|
||||||
|
# input we're currently processing, or else we'd read past the end of
|
||||||
|
# the input buffer. Here, we set |%r12| to 96 if there are at least 96
|
||||||
|
# bytes of input beyond the 96 bytes we're already processing, and we
|
||||||
|
# set |%r12| to 0 otherwise. In the case where we set |%r12| to 96,
|
||||||
|
# we'll read in the next block so that it is in registers for the next
|
||||||
|
# loop iteration. In the case where we set |%r12| to 0, we'll re-read
|
||||||
|
# the current block and then ignore what we re-read.
|
||||||
|
#
|
||||||
|
# At this point, |$in0| points to the current (already read into
|
||||||
|
# registers) block, and |$end0| points to 2*96 bytes before the end of
|
||||||
|
# the input. Thus, |$in0| > |$end0| means that we do not have the next
|
||||||
|
# 96-byte block to read in, and |$in0| <= |$end0| means we do.
|
||||||
xor %r12,%r12
|
xor %r12,%r12
|
||||||
cmp $in0,$end0
|
cmp $in0,$end0
|
||||||
|
|
||||||
|
@ -408,6 +425,9 @@ $code.=<<___;
|
||||||
.align 32
|
.align 32
|
||||||
aesni_gcm_decrypt:
|
aesni_gcm_decrypt:
|
||||||
xor $ret,$ret
|
xor $ret,$ret
|
||||||
|
|
||||||
|
# We call |_aesni_ctr32_ghash_6x|, which requires at least 96 (0x60)
|
||||||
|
# bytes of input.
|
||||||
cmp \$0x60,$len # minimal accepted length
|
cmp \$0x60,$len # minimal accepted length
|
||||||
jb .Lgcm_dec_abort
|
jb .Lgcm_dec_abort
|
||||||
|
|
||||||
|
@ -462,7 +482,15 @@ $code.=<<___;
|
||||||
vmovdqu 0x50($inp),$Z3 # I[5]
|
vmovdqu 0x50($inp),$Z3 # I[5]
|
||||||
lea ($inp),$in0
|
lea ($inp),$in0
|
||||||
vmovdqu 0x40($inp),$Z0
|
vmovdqu 0x40($inp),$Z0
|
||||||
|
|
||||||
|
# |_aesni_ctr32_ghash_6x| requires |$end0| to point to 2*96 (0xc0)
|
||||||
|
# bytes before the end of the input. Note, in particular, that this is
|
||||||
|
# correct even if |$len| is not an even multiple of 96 or 16. XXX: This
|
||||||
|
# seems to require that |$inp| + |$len| >= 2*96 (0xc0); i.e. |$inp| must
|
||||||
|
# not be near the very beginning of the address space when |$len| < 2*96
|
||||||
|
# (0xc0).
|
||||||
lea -0xc0($inp,$len),$end0
|
lea -0xc0($inp,$len),$end0
|
||||||
|
|
||||||
vmovdqu 0x30($inp),$Z1
|
vmovdqu 0x30($inp),$Z1
|
||||||
shr \$4,$len
|
shr \$4,$len
|
||||||
xor $ret,$ret
|
xor $ret,$ret
|
||||||
|
@ -618,6 +646,10 @@ _aesni_ctr32_6x:
|
||||||
.align 32
|
.align 32
|
||||||
aesni_gcm_encrypt:
|
aesni_gcm_encrypt:
|
||||||
xor $ret,$ret
|
xor $ret,$ret
|
||||||
|
|
||||||
|
# We call |_aesni_ctr32_6x| twice, each call consuming 96 bytes of
|
||||||
|
# input. Then we call |_aesni_ctr32_ghash_6x|, which requires at
|
||||||
|
# least 96 more bytes of input.
|
||||||
cmp \$0x60*3,$len # minimal accepted length
|
cmp \$0x60*3,$len # minimal accepted length
|
||||||
jb .Lgcm_enc_abort
|
jb .Lgcm_enc_abort
|
||||||
|
|
||||||
|
@ -667,7 +699,16 @@ $code.=<<___;
|
||||||
.Lenc_no_key_aliasing:
|
.Lenc_no_key_aliasing:
|
||||||
|
|
||||||
lea ($out),$in0
|
lea ($out),$in0
|
||||||
|
|
||||||
|
# |_aesni_ctr32_ghash_6x| requires |$end0| to point to 2*96 (0xc0)
|
||||||
|
# bytes before the end of the input. Note, in particular, that this is
|
||||||
|
# correct even if |$len| is not an even multiple of 96 or 16. Unlike in
|
||||||
|
# the decryption case, there's no caveat that |$out| must not be near
|
||||||
|
# the very beginning of the address space, because we know that
|
||||||
|
# |$len| >= 3*96 from the check above, and so we know
|
||||||
|
# |$out| + |$len| >= 2*96 (0xc0).
|
||||||
lea -0xc0($out,$len),$end0
|
lea -0xc0($out,$len),$end0
|
||||||
|
|
||||||
shr \$4,$len
|
shr \$4,$len
|
||||||
|
|
||||||
call _aesni_ctr32_6x
|
call _aesni_ctr32_6x
|
||||||
|
|
Loading…
Reference in a new issue