Fix some CFI issues in x86_64 assembly
The add/double shortcut in ecp_nistz256-x86_64.pl left one instruction point that did not unwind, and the "slow" path in AES_cbc_encrypt was not annotated correctly. For the latter, add .cfi_{remember,restore}_state support to perlasm. Next, fill in a bunch of functions that are missing no-op .cfi_startproc and .cfi_endproc blocks. libunwind cannot unwind those stack frames otherwise. Finally, work around a bug in libunwind by not encoding rflags. (rflags isn't a callee-saved register, so there's not much need to annotate it anyway.) These were found as part of ABI testing work in BoringSSL. Reviewed-by: Richard Levitte <levitte@openssl.org> GH: #8109
This commit is contained in:
parent
8f58ede095
commit
c0e8e5007b
9 changed files with 127 additions and 2 deletions
|
@ -554,6 +554,7 @@ $code.=<<___;
|
|||
.type _x86_64_AES_encrypt_compact,\@abi-omnipotent
|
||||
.align 16
|
||||
_x86_64_AES_encrypt_compact:
|
||||
.cfi_startproc
|
||||
lea 128($sbox),$inp # size optimization
|
||||
mov 0-128($inp),$acc1 # prefetch Te4
|
||||
mov 32-128($inp),$acc2
|
||||
|
@ -587,6 +588,7 @@ $code.=<<___;
|
|||
xor 8($key),$s2
|
||||
xor 12($key),$s3
|
||||
.byte 0xf3,0xc3 # rep ret
|
||||
.cfi_endproc
|
||||
.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
|
||||
___
|
||||
|
||||
|
@ -1161,6 +1163,7 @@ $code.=<<___;
|
|||
.type _x86_64_AES_decrypt_compact,\@abi-omnipotent
|
||||
.align 16
|
||||
_x86_64_AES_decrypt_compact:
|
||||
.cfi_startproc
|
||||
lea 128($sbox),$inp # size optimization
|
||||
mov 0-128($inp),$acc1 # prefetch Td4
|
||||
mov 32-128($inp),$acc2
|
||||
|
@ -1203,6 +1206,7 @@ $code.=<<___;
|
|||
xor 8($key),$s2
|
||||
xor 12($key),$s3
|
||||
.byte 0xf3,0xc3 # rep ret
|
||||
.cfi_endproc
|
||||
.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
|
||||
___
|
||||
|
||||
|
@ -1365,6 +1369,7 @@ AES_set_encrypt_key:
|
|||
.type _x86_64_AES_set_encrypt_key,\@abi-omnipotent
|
||||
.align 16
|
||||
_x86_64_AES_set_encrypt_key:
|
||||
.cfi_startproc
|
||||
mov %esi,%ecx # %ecx=bits
|
||||
mov %rdi,%rsi # %rsi=userKey
|
||||
mov %rdx,%rdi # %rdi=key
|
||||
|
@ -1546,6 +1551,7 @@ $code.=<<___;
|
|||
mov \$-1,%rax
|
||||
.Lexit:
|
||||
.byte 0xf3,0xc3 # rep ret
|
||||
.cfi_endproc
|
||||
.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
|
||||
___
|
||||
|
||||
|
@ -1728,7 +1734,9 @@ AES_cbc_encrypt:
|
|||
cmp \$0,%rdx # check length
|
||||
je .Lcbc_epilogue
|
||||
pushfq
|
||||
.cfi_push 49 # %rflags
|
||||
# This could be .cfi_push 49, but libunwind fails on registers it does not
|
||||
# recognize. See https://bugzilla.redhat.com/show_bug.cgi?id=217087.
|
||||
.cfi_adjust_cfa_offset 8
|
||||
push %rbx
|
||||
.cfi_push %rbx
|
||||
push %rbp
|
||||
|
@ -1751,6 +1759,7 @@ AES_cbc_encrypt:
|
|||
cmp \$0,%r9
|
||||
cmoveq %r10,$sbox
|
||||
|
||||
.cfi_remember_state
|
||||
mov OPENSSL_ia32cap_P(%rip),%r10d
|
||||
cmp \$$speed_limit,%rdx
|
||||
jb .Lcbc_slow_prologue
|
||||
|
@ -1986,6 +1995,7 @@ AES_cbc_encrypt:
|
|||
#--------------------------- SLOW ROUTINE ---------------------------#
|
||||
.align 16
|
||||
.Lcbc_slow_prologue:
|
||||
.cfi_restore_state
|
||||
# allocate aligned stack frame...
|
||||
lea -88(%rsp),%rbp
|
||||
and \$-64,%rbp
|
||||
|
@ -1997,8 +2007,10 @@ AES_cbc_encrypt:
|
|||
sub %r10,%rbp
|
||||
|
||||
xchg %rsp,%rbp
|
||||
.cfi_def_cfa_register %rbp
|
||||
#add \$8,%rsp # reserve for return address!
|
||||
mov %rbp,$_rsp # save %rsp
|
||||
.cfi_cfa_expression $_rsp,deref,+64
|
||||
.Lcbc_slow_body:
|
||||
#mov %rdi,$_inp # save copy of inp
|
||||
#mov %rsi,$_out # save copy of out
|
||||
|
@ -2187,7 +2199,9 @@ AES_cbc_encrypt:
|
|||
.cfi_def_cfa %rsp,16
|
||||
.Lcbc_popfq:
|
||||
popfq
|
||||
.cfi_pop 49 # %rflags
|
||||
# This could be .cfi_pop 49, but libunwind fails on registers it does not
|
||||
# recognize. See https://bugzilla.redhat.com/show_bug.cgi?id=217087.
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.Lcbc_epilogue:
|
||||
ret
|
||||
.cfi_endproc
|
||||
|
|
|
@ -274,6 +274,7 @@ $code.=<<___;
|
|||
.type ${PREFIX}_encrypt,\@abi-omnipotent
|
||||
.align 16
|
||||
${PREFIX}_encrypt:
|
||||
.cfi_startproc
|
||||
movups ($inp),$inout0 # load input
|
||||
mov 240($key),$rounds # key->rounds
|
||||
___
|
||||
|
@ -284,12 +285,14 @@ $code.=<<___;
|
|||
movups $inout0,($out) # output
|
||||
pxor $inout0,$inout0
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size ${PREFIX}_encrypt,.-${PREFIX}_encrypt
|
||||
|
||||
.globl ${PREFIX}_decrypt
|
||||
.type ${PREFIX}_decrypt,\@abi-omnipotent
|
||||
.align 16
|
||||
${PREFIX}_decrypt:
|
||||
.cfi_startproc
|
||||
movups ($inp),$inout0 # load input
|
||||
mov 240($key),$rounds # key->rounds
|
||||
___
|
||||
|
@ -300,6 +303,7 @@ $code.=<<___;
|
|||
movups $inout0,($out) # output
|
||||
pxor $inout0,$inout0
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size ${PREFIX}_decrypt, .-${PREFIX}_decrypt
|
||||
___
|
||||
}
|
||||
|
@ -325,6 +329,7 @@ $code.=<<___;
|
|||
.type _aesni_${dir}rypt2,\@abi-omnipotent
|
||||
.align 16
|
||||
_aesni_${dir}rypt2:
|
||||
.cfi_startproc
|
||||
$movkey ($key),$rndkey0
|
||||
shl \$4,$rounds
|
||||
$movkey 16($key),$rndkey1
|
||||
|
@ -350,6 +355,7 @@ _aesni_${dir}rypt2:
|
|||
aes${dir}last $rndkey0,$inout0
|
||||
aes${dir}last $rndkey0,$inout1
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _aesni_${dir}rypt2,.-_aesni_${dir}rypt2
|
||||
___
|
||||
}
|
||||
|
@ -361,6 +367,7 @@ $code.=<<___;
|
|||
.type _aesni_${dir}rypt3,\@abi-omnipotent
|
||||
.align 16
|
||||
_aesni_${dir}rypt3:
|
||||
.cfi_startproc
|
||||
$movkey ($key),$rndkey0
|
||||
shl \$4,$rounds
|
||||
$movkey 16($key),$rndkey1
|
||||
|
@ -391,6 +398,7 @@ _aesni_${dir}rypt3:
|
|||
aes${dir}last $rndkey0,$inout1
|
||||
aes${dir}last $rndkey0,$inout2
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _aesni_${dir}rypt3,.-_aesni_${dir}rypt3
|
||||
___
|
||||
}
|
||||
|
@ -406,6 +414,7 @@ $code.=<<___;
|
|||
.type _aesni_${dir}rypt4,\@abi-omnipotent
|
||||
.align 16
|
||||
_aesni_${dir}rypt4:
|
||||
.cfi_startproc
|
||||
$movkey ($key),$rndkey0
|
||||
shl \$4,$rounds
|
||||
$movkey 16($key),$rndkey1
|
||||
|
@ -442,6 +451,7 @@ _aesni_${dir}rypt4:
|
|||
aes${dir}last $rndkey0,$inout2
|
||||
aes${dir}last $rndkey0,$inout3
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _aesni_${dir}rypt4,.-_aesni_${dir}rypt4
|
||||
___
|
||||
}
|
||||
|
@ -453,6 +463,7 @@ $code.=<<___;
|
|||
.type _aesni_${dir}rypt6,\@abi-omnipotent
|
||||
.align 16
|
||||
_aesni_${dir}rypt6:
|
||||
.cfi_startproc
|
||||
$movkey ($key),$rndkey0
|
||||
shl \$4,$rounds
|
||||
$movkey 16($key),$rndkey1
|
||||
|
@ -503,6 +514,7 @@ _aesni_${dir}rypt6:
|
|||
aes${dir}last $rndkey0,$inout4
|
||||
aes${dir}last $rndkey0,$inout5
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _aesni_${dir}rypt6,.-_aesni_${dir}rypt6
|
||||
___
|
||||
}
|
||||
|
@ -514,6 +526,7 @@ $code.=<<___;
|
|||
.type _aesni_${dir}rypt8,\@abi-omnipotent
|
||||
.align 16
|
||||
_aesni_${dir}rypt8:
|
||||
.cfi_startproc
|
||||
$movkey ($key),$rndkey0
|
||||
shl \$4,$rounds
|
||||
$movkey 16($key),$rndkey1
|
||||
|
@ -574,6 +587,7 @@ _aesni_${dir}rypt8:
|
|||
aes${dir}last $rndkey0,$inout6
|
||||
aes${dir}last $rndkey0,$inout7
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _aesni_${dir}rypt8,.-_aesni_${dir}rypt8
|
||||
___
|
||||
}
|
||||
|
@ -598,6 +612,7 @@ $code.=<<___;
|
|||
.type aesni_ecb_encrypt,\@function,5
|
||||
.align 16
|
||||
aesni_ecb_encrypt:
|
||||
.cfi_startproc
|
||||
___
|
||||
$code.=<<___ if ($win64);
|
||||
lea -0x58(%rsp),%rsp
|
||||
|
@ -943,6 +958,7 @@ $code.=<<___ if ($win64);
|
|||
___
|
||||
$code.=<<___;
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size aesni_ecb_encrypt,.-aesni_ecb_encrypt
|
||||
___
|
||||
|
||||
|
|
|
@ -816,6 +816,7 @@ $code.=<<___;
|
|||
.type _bsaes_encrypt8,\@abi-omnipotent
|
||||
.align 64
|
||||
_bsaes_encrypt8:
|
||||
.cfi_startproc
|
||||
lea .LBS0(%rip), $const # constants table
|
||||
|
||||
movdqa ($key), @XMM[9] # round 0 key
|
||||
|
@ -875,11 +876,13 @@ $code.=<<___;
|
|||
pxor @XMM[8], @XMM[0]
|
||||
pxor @XMM[8], @XMM[1]
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _bsaes_encrypt8,.-_bsaes_encrypt8
|
||||
|
||||
.type _bsaes_decrypt8,\@abi-omnipotent
|
||||
.align 64
|
||||
_bsaes_decrypt8:
|
||||
.cfi_startproc
|
||||
lea .LBS0(%rip), $const # constants table
|
||||
|
||||
movdqa ($key), @XMM[9] # round 0 key
|
||||
|
@ -937,6 +940,7 @@ $code.=<<___;
|
|||
pxor @XMM[8], @XMM[0]
|
||||
pxor @XMM[8], @XMM[1]
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _bsaes_decrypt8,.-_bsaes_decrypt8
|
||||
___
|
||||
}
|
||||
|
@ -971,6 +975,7 @@ $code.=<<___;
|
|||
.type _bsaes_key_convert,\@abi-omnipotent
|
||||
.align 16
|
||||
_bsaes_key_convert:
|
||||
.cfi_startproc
|
||||
lea .Lmasks(%rip), $const
|
||||
movdqu ($inp), %xmm7 # load round 0 key
|
||||
lea 0x10($inp), $inp
|
||||
|
@ -1049,6 +1054,7 @@ _bsaes_key_convert:
|
|||
movdqa 0x50($const), %xmm7 # .L63
|
||||
#movdqa %xmm6, ($out) # don't save last round key
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _bsaes_key_convert,.-_bsaes_key_convert
|
||||
___
|
||||
}
|
||||
|
|
|
@ -91,6 +91,7 @@ $code.=<<___;
|
|||
.type _vpaes_encrypt_core,\@abi-omnipotent
|
||||
.align 16
|
||||
_vpaes_encrypt_core:
|
||||
.cfi_startproc
|
||||
mov %rdx, %r9
|
||||
mov \$16, %r11
|
||||
mov 240(%rdx),%eax
|
||||
|
@ -171,6 +172,7 @@ _vpaes_encrypt_core:
|
|||
pxor %xmm4, %xmm0 # 0 = A
|
||||
pshufb %xmm1, %xmm0
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
|
||||
|
||||
##
|
||||
|
@ -181,6 +183,7 @@ _vpaes_encrypt_core:
|
|||
.type _vpaes_decrypt_core,\@abi-omnipotent
|
||||
.align 16
|
||||
_vpaes_decrypt_core:
|
||||
.cfi_startproc
|
||||
mov %rdx, %r9 # load key
|
||||
mov 240(%rdx),%eax
|
||||
movdqa %xmm9, %xmm1
|
||||
|
@ -277,6 +280,7 @@ _vpaes_decrypt_core:
|
|||
pxor %xmm4, %xmm0 # 0 = A
|
||||
pshufb %xmm2, %xmm0
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _vpaes_decrypt_core,.-_vpaes_decrypt_core
|
||||
|
||||
########################################################
|
||||
|
@ -287,6 +291,7 @@ _vpaes_decrypt_core:
|
|||
.type _vpaes_schedule_core,\@abi-omnipotent
|
||||
.align 16
|
||||
_vpaes_schedule_core:
|
||||
.cfi_startproc
|
||||
# rdi = key
|
||||
# rsi = size in bits
|
||||
# rdx = buffer
|
||||
|
@ -453,6 +458,7 @@ _vpaes_schedule_core:
|
|||
pxor %xmm6, %xmm6
|
||||
pxor %xmm7, %xmm7
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _vpaes_schedule_core,.-_vpaes_schedule_core
|
||||
|
||||
##
|
||||
|
@ -472,6 +478,7 @@ _vpaes_schedule_core:
|
|||
.type _vpaes_schedule_192_smear,\@abi-omnipotent
|
||||
.align 16
|
||||
_vpaes_schedule_192_smear:
|
||||
.cfi_startproc
|
||||
pshufd \$0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0
|
||||
pshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a
|
||||
pxor %xmm1, %xmm6 # -> c+d c 0 0
|
||||
|
@ -480,6 +487,7 @@ _vpaes_schedule_192_smear:
|
|||
movdqa %xmm6, %xmm0
|
||||
movhlps %xmm1, %xmm6 # clobber low side with zeros
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
|
||||
|
||||
##
|
||||
|
@ -503,6 +511,7 @@ _vpaes_schedule_192_smear:
|
|||
.type _vpaes_schedule_round,\@abi-omnipotent
|
||||
.align 16
|
||||
_vpaes_schedule_round:
|
||||
.cfi_startproc
|
||||
# extract rcon from xmm8
|
||||
pxor %xmm1, %xmm1
|
||||
palignr \$15, %xmm8, %xmm1
|
||||
|
@ -556,6 +565,7 @@ _vpaes_schedule_low_round:
|
|||
pxor %xmm7, %xmm0
|
||||
movdqa %xmm0, %xmm7
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _vpaes_schedule_round,.-_vpaes_schedule_round
|
||||
|
||||
##
|
||||
|
@ -570,6 +580,7 @@ _vpaes_schedule_low_round:
|
|||
.type _vpaes_schedule_transform,\@abi-omnipotent
|
||||
.align 16
|
||||
_vpaes_schedule_transform:
|
||||
.cfi_startproc
|
||||
movdqa %xmm9, %xmm1
|
||||
pandn %xmm0, %xmm1
|
||||
psrld \$4, %xmm1
|
||||
|
@ -580,6 +591,7 @@ _vpaes_schedule_transform:
|
|||
pshufb %xmm1, %xmm0
|
||||
pxor %xmm2, %xmm0
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
|
||||
|
||||
##
|
||||
|
@ -608,6 +620,7 @@ _vpaes_schedule_transform:
|
|||
.type _vpaes_schedule_mangle,\@abi-omnipotent
|
||||
.align 16
|
||||
_vpaes_schedule_mangle:
|
||||
.cfi_startproc
|
||||
movdqa %xmm0, %xmm4 # save xmm0 for later
|
||||
movdqa .Lk_mc_forward(%rip),%xmm5
|
||||
test %rcx, %rcx
|
||||
|
@ -672,6 +685,7 @@ _vpaes_schedule_mangle:
|
|||
and \$0x30, %r8
|
||||
movdqu %xmm3, (%rdx)
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
|
||||
|
||||
#
|
||||
|
@ -681,6 +695,7 @@ _vpaes_schedule_mangle:
|
|||
.type ${PREFIX}_set_encrypt_key,\@function,3
|
||||
.align 16
|
||||
${PREFIX}_set_encrypt_key:
|
||||
.cfi_startproc
|
||||
___
|
||||
$code.=<<___ if ($win64);
|
||||
lea -0xb8(%rsp),%rsp
|
||||
|
@ -723,12 +738,14 @@ ___
|
|||
$code.=<<___;
|
||||
xor %eax,%eax
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key
|
||||
|
||||
.globl ${PREFIX}_set_decrypt_key
|
||||
.type ${PREFIX}_set_decrypt_key,\@function,3
|
||||
.align 16
|
||||
${PREFIX}_set_decrypt_key:
|
||||
.cfi_startproc
|
||||
___
|
||||
$code.=<<___ if ($win64);
|
||||
lea -0xb8(%rsp),%rsp
|
||||
|
@ -776,12 +793,14 @@ ___
|
|||
$code.=<<___;
|
||||
xor %eax,%eax
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key
|
||||
|
||||
.globl ${PREFIX}_encrypt
|
||||
.type ${PREFIX}_encrypt,\@function,3
|
||||
.align 16
|
||||
${PREFIX}_encrypt:
|
||||
.cfi_startproc
|
||||
___
|
||||
$code.=<<___ if ($win64);
|
||||
lea -0xb8(%rsp),%rsp
|
||||
|
@ -819,12 +838,14 @@ $code.=<<___ if ($win64);
|
|||
___
|
||||
$code.=<<___;
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size ${PREFIX}_encrypt,.-${PREFIX}_encrypt
|
||||
|
||||
.globl ${PREFIX}_decrypt
|
||||
.type ${PREFIX}_decrypt,\@function,3
|
||||
.align 16
|
||||
${PREFIX}_decrypt:
|
||||
.cfi_startproc
|
||||
___
|
||||
$code.=<<___ if ($win64);
|
||||
lea -0xb8(%rsp),%rsp
|
||||
|
@ -862,6 +883,7 @@ $code.=<<___ if ($win64);
|
|||
___
|
||||
$code.=<<___;
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size ${PREFIX}_decrypt,.-${PREFIX}_decrypt
|
||||
___
|
||||
{
|
||||
|
@ -874,6 +896,7 @@ $code.=<<___;
|
|||
.type ${PREFIX}_cbc_encrypt,\@function,6
|
||||
.align 16
|
||||
${PREFIX}_cbc_encrypt:
|
||||
.cfi_startproc
|
||||
xchg $key,$len
|
||||
___
|
||||
($len,$key)=($key,$len);
|
||||
|
@ -944,6 +967,7 @@ ___
|
|||
$code.=<<___;
|
||||
.Lcbc_abort:
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
|
||||
___
|
||||
}
|
||||
|
@ -957,6 +981,7 @@ $code.=<<___;
|
|||
.type _vpaes_preheat,\@abi-omnipotent
|
||||
.align 16
|
||||
_vpaes_preheat:
|
||||
.cfi_startproc
|
||||
lea .Lk_s0F(%rip), %r10
|
||||
movdqa -0x20(%r10), %xmm10 # .Lk_inv
|
||||
movdqa -0x10(%r10), %xmm11 # .Lk_inv+16
|
||||
|
@ -966,6 +991,7 @@ _vpaes_preheat:
|
|||
movdqa 0x50(%r10), %xmm15 # .Lk_sb2
|
||||
movdqa 0x60(%r10), %xmm14 # .Lk_sb2+16
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _vpaes_preheat,.-_vpaes_preheat
|
||||
########################################################
|
||||
## ##
|
||||
|
|
|
@ -1492,6 +1492,7 @@ $code.=<<___;
|
|||
.type rsaz_1024_red2norm_avx2,\@abi-omnipotent
|
||||
.align 32
|
||||
rsaz_1024_red2norm_avx2:
|
||||
.cfi_startproc
|
||||
sub \$-128,$inp # size optimization
|
||||
xor %rax,%rax
|
||||
___
|
||||
|
@ -1525,12 +1526,14 @@ ___
|
|||
}
|
||||
$code.=<<___;
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size rsaz_1024_red2norm_avx2,.-rsaz_1024_red2norm_avx2
|
||||
|
||||
.globl rsaz_1024_norm2red_avx2
|
||||
.type rsaz_1024_norm2red_avx2,\@abi-omnipotent
|
||||
.align 32
|
||||
rsaz_1024_norm2red_avx2:
|
||||
.cfi_startproc
|
||||
sub \$-128,$out # size optimization
|
||||
mov ($inp),@T[0]
|
||||
mov \$0x1fffffff,%eax
|
||||
|
@ -1562,6 +1565,7 @@ $code.=<<___;
|
|||
mov @T[0],`8*($j+2)-128`($out)
|
||||
mov @T[0],`8*($j+3)-128`($out)
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size rsaz_1024_norm2red_avx2,.-rsaz_1024_norm2red_avx2
|
||||
___
|
||||
}
|
||||
|
@ -1573,6 +1577,7 @@ $code.=<<___;
|
|||
.type rsaz_1024_scatter5_avx2,\@abi-omnipotent
|
||||
.align 32
|
||||
rsaz_1024_scatter5_avx2:
|
||||
.cfi_startproc
|
||||
vzeroupper
|
||||
vmovdqu .Lscatter_permd(%rip),%ymm5
|
||||
shl \$4,$power
|
||||
|
@ -1592,6 +1597,7 @@ rsaz_1024_scatter5_avx2:
|
|||
|
||||
vzeroupper
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size rsaz_1024_scatter5_avx2,.-rsaz_1024_scatter5_avx2
|
||||
|
||||
.globl rsaz_1024_gather5_avx2
|
||||
|
|
|
@ -2910,6 +2910,7 @@ bn_powerx5:
|
|||
.align 32
|
||||
bn_sqrx8x_internal:
|
||||
__bn_sqrx8x_internal:
|
||||
.cfi_startproc
|
||||
##################################################################
|
||||
# Squaring part:
|
||||
#
|
||||
|
@ -3542,6 +3543,7 @@ __bn_sqrx8x_reduction:
|
|||
cmp 8+8(%rsp),%r8 # end of t[]?
|
||||
jb .Lsqrx8x_reduction_loop
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size bn_sqrx8x_internal,.-bn_sqrx8x_internal
|
||||
___
|
||||
}
|
||||
|
|
|
@ -1674,6 +1674,7 @@ $code.=<<___;
|
|||
.type __ecp_nistz256_mul_montq,\@abi-omnipotent
|
||||
.align 32
|
||||
__ecp_nistz256_mul_montq:
|
||||
.cfi_startproc
|
||||
########################################################################
|
||||
# Multiply a by b[0]
|
||||
mov %rax, $t1
|
||||
|
@ -1885,6 +1886,7 @@ __ecp_nistz256_mul_montq:
|
|||
mov $acc1, 8*3($r_ptr)
|
||||
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
|
||||
|
||||
################################################################################
|
||||
|
@ -1968,6 +1970,7 @@ $code.=<<___;
|
|||
.type __ecp_nistz256_sqr_montq,\@abi-omnipotent
|
||||
.align 32
|
||||
__ecp_nistz256_sqr_montq:
|
||||
.cfi_startproc
|
||||
mov %rax, $acc5
|
||||
mulq $acc6 # a[1]*a[0]
|
||||
mov %rax, $acc1
|
||||
|
@ -2125,6 +2128,7 @@ __ecp_nistz256_sqr_montq:
|
|||
mov $acc7, 8*3($r_ptr)
|
||||
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
|
||||
___
|
||||
|
||||
|
@ -2133,6 +2137,7 @@ $code.=<<___;
|
|||
.type __ecp_nistz256_mul_montx,\@abi-omnipotent
|
||||
.align 32
|
||||
__ecp_nistz256_mul_montx:
|
||||
.cfi_startproc
|
||||
########################################################################
|
||||
# Multiply by b[0]
|
||||
mulx $acc1, $acc0, $acc1
|
||||
|
@ -2295,11 +2300,13 @@ __ecp_nistz256_mul_montx:
|
|||
mov $acc1, 8*3($r_ptr)
|
||||
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx
|
||||
|
||||
.type __ecp_nistz256_sqr_montx,\@abi-omnipotent
|
||||
.align 32
|
||||
__ecp_nistz256_sqr_montx:
|
||||
.cfi_startproc
|
||||
mulx $acc6, $acc1, $acc2 # a[0]*a[1]
|
||||
mulx $acc7, $t0, $acc3 # a[0]*a[2]
|
||||
xor %eax, %eax
|
||||
|
@ -2423,6 +2430,7 @@ __ecp_nistz256_sqr_montx:
|
|||
mov $acc7, 8*3($r_ptr)
|
||||
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
|
||||
___
|
||||
}
|
||||
|
@ -2578,6 +2586,7 @@ ecp_nistz256_scatter_w5:
|
|||
.type ecp_nistz256_gather_w5,\@abi-omnipotent
|
||||
.align 32
|
||||
ecp_nistz256_gather_w5:
|
||||
.cfi_startproc
|
||||
___
|
||||
$code.=<<___ if ($avx>1);
|
||||
mov OPENSSL_ia32cap_P+8(%rip), %eax
|
||||
|
@ -2666,6 +2675,7 @@ $code.=<<___ if ($win64);
|
|||
___
|
||||
$code.=<<___;
|
||||
ret
|
||||
.cfi_endproc
|
||||
.LSEH_end_ecp_nistz256_gather_w5:
|
||||
.size ecp_nistz256_gather_w5,.-ecp_nistz256_gather_w5
|
||||
|
||||
|
@ -2694,6 +2704,7 @@ ecp_nistz256_scatter_w7:
|
|||
.type ecp_nistz256_gather_w7,\@abi-omnipotent
|
||||
.align 32
|
||||
ecp_nistz256_gather_w7:
|
||||
.cfi_startproc
|
||||
___
|
||||
$code.=<<___ if ($avx>1);
|
||||
mov OPENSSL_ia32cap_P+8(%rip), %eax
|
||||
|
@ -2771,6 +2782,7 @@ $code.=<<___ if ($win64);
|
|||
___
|
||||
$code.=<<___;
|
||||
ret
|
||||
.cfi_endproc
|
||||
.LSEH_end_ecp_nistz256_gather_w7:
|
||||
.size ecp_nistz256_gather_w7,.-ecp_nistz256_gather_w7
|
||||
___
|
||||
|
@ -2787,6 +2799,7 @@ $code.=<<___;
|
|||
.type ecp_nistz256_avx2_gather_w5,\@abi-omnipotent
|
||||
.align 32
|
||||
ecp_nistz256_avx2_gather_w5:
|
||||
.cfi_startproc
|
||||
.Lavx2_gather_w5:
|
||||
vzeroupper
|
||||
___
|
||||
|
@ -2874,6 +2887,7 @@ $code.=<<___ if ($win64);
|
|||
___
|
||||
$code.=<<___;
|
||||
ret
|
||||
.cfi_endproc
|
||||
.LSEH_end_ecp_nistz256_avx2_gather_w5:
|
||||
.size ecp_nistz256_avx2_gather_w5,.-ecp_nistz256_avx2_gather_w5
|
||||
___
|
||||
|
@ -2893,6 +2907,7 @@ $code.=<<___;
|
|||
.type ecp_nistz256_avx2_gather_w7,\@abi-omnipotent
|
||||
.align 32
|
||||
ecp_nistz256_avx2_gather_w7:
|
||||
.cfi_startproc
|
||||
.Lavx2_gather_w7:
|
||||
vzeroupper
|
||||
___
|
||||
|
@ -2995,6 +3010,7 @@ $code.=<<___ if ($win64);
|
|||
___
|
||||
$code.=<<___;
|
||||
ret
|
||||
.cfi_endproc
|
||||
.LSEH_end_ecp_nistz256_avx2_gather_w7:
|
||||
.size ecp_nistz256_avx2_gather_w7,.-ecp_nistz256_avx2_gather_w7
|
||||
___
|
||||
|
@ -3064,6 +3080,7 @@ $code.=<<___;
|
|||
.type __ecp_nistz256_add_toq,\@abi-omnipotent
|
||||
.align 32
|
||||
__ecp_nistz256_add_toq:
|
||||
.cfi_startproc
|
||||
xor $t4,$t4
|
||||
add 8*0($b_ptr), $a0
|
||||
adc 8*1($b_ptr), $a1
|
||||
|
@ -3091,11 +3108,13 @@ __ecp_nistz256_add_toq:
|
|||
mov $a3, 8*3($r_ptr)
|
||||
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
|
||||
|
||||
.type __ecp_nistz256_sub_fromq,\@abi-omnipotent
|
||||
.align 32
|
||||
__ecp_nistz256_sub_fromq:
|
||||
.cfi_startproc
|
||||
sub 8*0($b_ptr), $a0
|
||||
sbb 8*1($b_ptr), $a1
|
||||
mov $a0, $t0
|
||||
|
@ -3122,11 +3141,13 @@ __ecp_nistz256_sub_fromq:
|
|||
mov $a3, 8*3($r_ptr)
|
||||
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
|
||||
|
||||
.type __ecp_nistz256_subq,\@abi-omnipotent
|
||||
.align 32
|
||||
__ecp_nistz256_subq:
|
||||
.cfi_startproc
|
||||
sub $a0, $t0
|
||||
sbb $a1, $t1
|
||||
mov $t0, $a0
|
||||
|
@ -3149,11 +3170,13 @@ __ecp_nistz256_subq:
|
|||
cmovnz $t3, $a3
|
||||
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size __ecp_nistz256_subq,.-__ecp_nistz256_subq
|
||||
|
||||
.type __ecp_nistz256_mul_by_2q,\@abi-omnipotent
|
||||
.align 32
|
||||
__ecp_nistz256_mul_by_2q:
|
||||
.cfi_startproc
|
||||
xor $t4, $t4
|
||||
add $a0, $a0 # a0:a3+a0:a3
|
||||
adc $a1, $a1
|
||||
|
@ -3181,6 +3204,7 @@ __ecp_nistz256_mul_by_2q:
|
|||
mov $a3, 8*3($r_ptr)
|
||||
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
|
||||
___
|
||||
}
|
||||
|
@ -3620,7 +3644,9 @@ $code.=<<___;
|
|||
movq %xmm1, $a_ptr # restore $a_ptr
|
||||
movq %xmm0, $r_ptr # restore $r_ptr
|
||||
add \$`32*(18-5)`, %rsp # difference in frame sizes
|
||||
.cfi_adjust_cfa_offset `-32*(18-5)`
|
||||
jmp .Lpoint_double_shortcut$x
|
||||
.cfi_adjust_cfa_offset `32*(18-5)`
|
||||
|
||||
.align 32
|
||||
.Ladd_proceed$x:
|
||||
|
@ -4156,6 +4182,7 @@ $code.=<<___;
|
|||
.type __ecp_nistz256_add_tox,\@abi-omnipotent
|
||||
.align 32
|
||||
__ecp_nistz256_add_tox:
|
||||
.cfi_startproc
|
||||
xor $t4, $t4
|
||||
adc 8*0($b_ptr), $a0
|
||||
adc 8*1($b_ptr), $a1
|
||||
|
@ -4184,11 +4211,13 @@ __ecp_nistz256_add_tox:
|
|||
mov $a3, 8*3($r_ptr)
|
||||
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox
|
||||
|
||||
.type __ecp_nistz256_sub_fromx,\@abi-omnipotent
|
||||
.align 32
|
||||
__ecp_nistz256_sub_fromx:
|
||||
.cfi_startproc
|
||||
xor $t4, $t4
|
||||
sbb 8*0($b_ptr), $a0
|
||||
sbb 8*1($b_ptr), $a1
|
||||
|
@ -4217,11 +4246,13 @@ __ecp_nistz256_sub_fromx:
|
|||
mov $a3, 8*3($r_ptr)
|
||||
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx
|
||||
|
||||
.type __ecp_nistz256_subx,\@abi-omnipotent
|
||||
.align 32
|
||||
__ecp_nistz256_subx:
|
||||
.cfi_startproc
|
||||
xor $t4, $t4
|
||||
sbb $a0, $t0
|
||||
sbb $a1, $t1
|
||||
|
@ -4246,11 +4277,13 @@ __ecp_nistz256_subx:
|
|||
cmovc $t3, $a3
|
||||
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size __ecp_nistz256_subx,.-__ecp_nistz256_subx
|
||||
|
||||
.type __ecp_nistz256_mul_by_2x,\@abi-omnipotent
|
||||
.align 32
|
||||
__ecp_nistz256_mul_by_2x:
|
||||
.cfi_startproc
|
||||
xor $t4, $t4
|
||||
adc $a0, $a0 # a0:a3+a0:a3
|
||||
adc $a1, $a1
|
||||
|
@ -4279,6 +4312,7 @@ __ecp_nistz256_mul_by_2x:
|
|||
mov $a3, 8*3($r_ptr)
|
||||
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
|
||||
___
|
||||
}
|
||||
|
|
|
@ -529,6 +529,7 @@ $code.=<<___;
|
|||
.type gcm_init_clmul,\@abi-omnipotent
|
||||
.align 16
|
||||
gcm_init_clmul:
|
||||
.cfi_startproc
|
||||
.L_init_clmul:
|
||||
___
|
||||
$code.=<<___ if ($win64);
|
||||
|
@ -598,6 +599,7 @@ $code.=<<___ if ($win64);
|
|||
___
|
||||
$code.=<<___;
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size gcm_init_clmul,.-gcm_init_clmul
|
||||
___
|
||||
}
|
||||
|
@ -609,6 +611,7 @@ $code.=<<___;
|
|||
.type gcm_gmult_clmul,\@abi-omnipotent
|
||||
.align 16
|
||||
gcm_gmult_clmul:
|
||||
.cfi_startproc
|
||||
.L_gmult_clmul:
|
||||
movdqu ($Xip),$Xi
|
||||
movdqa .Lbswap_mask(%rip),$T3
|
||||
|
@ -645,6 +648,7 @@ $code.=<<___;
|
|||
pshufb $T3,$Xi
|
||||
movdqu $Xi,($Xip)
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size gcm_gmult_clmul,.-gcm_gmult_clmul
|
||||
___
|
||||
}
|
||||
|
@ -658,6 +662,7 @@ $code.=<<___;
|
|||
.type gcm_ghash_clmul,\@abi-omnipotent
|
||||
.align 32
|
||||
gcm_ghash_clmul:
|
||||
.cfi_startproc
|
||||
.L_ghash_clmul:
|
||||
___
|
||||
$code.=<<___ if ($win64);
|
||||
|
@ -1005,6 +1010,7 @@ $code.=<<___ if ($win64);
|
|||
___
|
||||
$code.=<<___;
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size gcm_ghash_clmul,.-gcm_ghash_clmul
|
||||
___
|
||||
}
|
||||
|
@ -1014,6 +1020,7 @@ $code.=<<___;
|
|||
.type gcm_init_avx,\@abi-omnipotent
|
||||
.align 32
|
||||
gcm_init_avx:
|
||||
.cfi_startproc
|
||||
___
|
||||
if ($avx) {
|
||||
my ($Htbl,$Xip)=@_4args;
|
||||
|
@ -1142,6 +1149,7 @@ $code.=<<___ if ($win64);
|
|||
___
|
||||
$code.=<<___;
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size gcm_init_avx,.-gcm_init_avx
|
||||
___
|
||||
} else {
|
||||
|
@ -1156,7 +1164,9 @@ $code.=<<___;
|
|||
.type gcm_gmult_avx,\@abi-omnipotent
|
||||
.align 32
|
||||
gcm_gmult_avx:
|
||||
.cfi_startproc
|
||||
jmp .L_gmult_clmul
|
||||
.cfi_endproc
|
||||
.size gcm_gmult_avx,.-gcm_gmult_avx
|
||||
___
|
||||
|
||||
|
@ -1165,6 +1175,7 @@ $code.=<<___;
|
|||
.type gcm_ghash_avx,\@abi-omnipotent
|
||||
.align 32
|
||||
gcm_ghash_avx:
|
||||
.cfi_startproc
|
||||
___
|
||||
if ($avx) {
|
||||
my ($Xip,$Htbl,$inp,$len)=@_4args;
|
||||
|
@ -1577,6 +1588,7 @@ $code.=<<___ if ($win64);
|
|||
___
|
||||
$code.=<<___;
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size gcm_ghash_avx,.-gcm_ghash_avx
|
||||
___
|
||||
} else {
|
||||
|
|
|
@ -541,6 +541,7 @@ my %globals;
|
|||
);
|
||||
|
||||
my ($cfa_reg, $cfa_rsp);
|
||||
my @cfa_stack;
|
||||
|
||||
# [us]leb128 format is variable-length integer representation base
|
||||
# 2^128, with most significant bit of each byte being 0 denoting
|
||||
|
@ -688,6 +689,14 @@ my %globals;
|
|||
cfa_expression($$line)));
|
||||
last;
|
||||
};
|
||||
/remember_state/
|
||||
&& do { push @cfa_stack, [$cfa_reg, $cfa_rsp];
|
||||
last;
|
||||
};
|
||||
/restore_state/
|
||||
&& do { ($cfa_reg, $cfa_rsp) = @{pop @cfa_stack};
|
||||
last;
|
||||
};
|
||||
}
|
||||
|
||||
$self->{value} = ".cfi_$dir\t$$line" if ($dir);
|
||||
|
|
Loading…
Reference in a new issue