bsaes-x86_64.pl: add Win64 SEH and "hadrware" calls to aes-x86_64.pl.

This commit is contained in:
Andy Polyakov 2011-11-13 20:33:41 +00:00
parent 0985bd4f80
commit fe06864836
2 changed files with 216 additions and 26 deletions

View file

@ -588,6 +588,9 @@ $code.=<<___;
.globl AES_encrypt
.type AES_encrypt,\@function,3
.align 16
.globl asm_AES_encrypt
.hidden asm_AES_encrypt
asm_AES_encrypt:
AES_encrypt:
push %rbx
push %rbp
@ -1184,6 +1187,9 @@ $code.=<<___;
.globl AES_decrypt
.type AES_decrypt,\@function,3
.align 16
.globl asm_AES_decrypt
.hidden asm_AES_decrypt
asm_AES_decrypt:
AES_decrypt:
push %rbx
push %rbp
@ -1644,6 +1650,9 @@ $code.=<<___;
.type AES_cbc_encrypt,\@function,6
.align 16
.extern OPENSSL_ia32cap_P
.globl asm_AES_cbc_encrypt
.hidden asm_AES_cbc_encrypt
asm_AES_cbc_encrypt:
AES_cbc_encrypt:
cmp \$0,%rdx # check length
je .Lcbc_epilogue

View file

@ -88,8 +88,8 @@
#
# November 2011.
#
# Add bsaes_xts_[en|de]crypt. Small-block performance is suboptimal,
# but XTS is meant to be used with larger blocks...
# Add bsaes_xts_[en|de]crypt. Less-than-80-bytes-block performance is
# suboptimal, but XTS is meant to be used with larger blocks...
#
# <appro@openssl.org>
@ -108,6 +108,7 @@ open STDOUT,"| $^X $xlate $flavour $output";
my ($inp,$out,$len,$key,$ivp)=("%rdi","%rsi","%rdx","%rcx");
my @XMM=map("%xmm$_",(15,0..14)); # best on Atom, +10% over (0..15)
my $ecb=0; # suppress unreferenced ECB subroutines, spare some space...
{
my ($key,$rounds,$const)=("%rax","%r10d","%r11");
@ -743,8 +744,8 @@ ___
$code.=<<___;
.text
.extern AES_encrypt
.extern AES_decrypt
.extern asm_AES_encrypt
.extern asm_AES_decrypt
.type _bsaes_encrypt8,\@abi-omnipotent
.align 64
@ -950,7 +951,7 @@ $code.=<<___;
___
}
if (1 && !$win64) { # following four functions are unsupported interface
if (0 && !$win64) { # following four functions are unsupported interface
# used for benchmarking...
$code.=<<___;
.globl bsaes_enc_key_convert
@ -1056,12 +1057,14 @@ my ($arg1,$arg2,$arg3,$arg4,$arg5,$arg6)=$win64 ? ("%rcx","%rdx","%r8","%r9","%r
: ("%rdi","%rsi","%rdx","%rcx","%r8","%r9d");
my ($inp,$out,$len,$key)=("%r12","%r13","%r14","%r15");
if (0) { # suppress unreferenced ECB subroutines, spare some space...
if ($ecb) {
$code.=<<___;
.globl bsaes_ecb_encrypt_blocks
.type bsaes_ecb_encrypt_blocks,\@abi-omnipotent
.align 16
bsaes_ecb_encrypt_blocks:
mov %rsp, %rax
.Lecb_enc_prologue:
push %rbp
push %rbx
push %r12
@ -1213,7 +1216,7 @@ $code.=<<___;
lea ($inp), $arg1
lea ($out), $arg2
lea ($key), $arg3
call AES_encrypt
call asm_AES_encrypt
lea 16($inp), $inp
lea 16($out), $out
dec $len
@ -1250,8 +1253,9 @@ $code.=<<___;
mov 0x58(%rsp), %r13
mov 0x60(%rsp), %r12
mov 0x68(%rsp), %rbx
mov 0x70(%rsp), %rbp
mov 0x70(%rsp), %rax
lea 0x78(%rsp), %rsp
mov %rax, %rbp
.Lecb_enc_epilogue:
ret
.size bsaes_ecb_encrypt_blocks,.-bsaes_ecb_encrypt_blocks
@ -1260,6 +1264,8 @@ $code.=<<___;
.type bsaes_ecb_decrypt_blocks,\@abi-omnipotent
.align 16
bsaes_ecb_decrypt_blocks:
mov %rsp, %rax
.Lecb_dec_prologue:
push %rbp
push %rbx
push %r12
@ -1412,7 +1418,7 @@ $code.=<<___;
lea ($inp), $arg1
lea ($out), $arg2
lea ($key), $arg3
call AES_decrypt
call asm_AES_decrypt
lea 16($inp), $inp
lea 16($out), $out
dec $len
@ -1449,15 +1455,16 @@ $code.=<<___;
mov 0x58(%rsp), %r13
mov 0x60(%rsp), %r12
mov 0x68(%rsp), %rbx
mov 0x70(%rsp), %rbp
mov 0x70(%rsp), %rax
lea 0x78(%rsp), %rsp
mov %rax, %rbp
.Lecb_dec_epilogue:
ret
.size bsaes_ecb_decrypt_blocks,.-bsaes_ecb_decrypt_blocks
___
}
$code.=<<___;
.extern AES_cbc_encrypt
.extern asm_AES_cbc_encrypt
.globl bsaes_cbc_encrypt
.type bsaes_cbc_encrypt,\@abi-omnipotent
.align 16
@ -1468,10 +1475,12 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
cmp \$0,$arg6
jne AES_cbc_encrypt
jne asm_AES_cbc_encrypt
cmp \$128,$arg3
jb AES_cbc_encrypt
jb asm_AES_cbc_encrypt
mov %rsp, %rax
.Lcbc_dec_prologue:
push %rbp
push %rbx
push %r12
@ -1699,7 +1708,7 @@ $code.=<<___;
lea ($inp), $arg1
lea 0x20(%rbp), $arg2 # buffer output
lea ($key), $arg3
call AES_decrypt # doesn't touch %xmm
call asm_AES_decrypt # doesn't touch %xmm
pxor 0x20(%rbp), @XMM[15] # ^= IV
movdqu @XMM[15], ($out) # write output
movdqa @XMM[0], @XMM[15] # IV
@ -1736,8 +1745,9 @@ $code.=<<___;
mov 0x58(%rsp), %r13
mov 0x60(%rsp), %r12
mov 0x68(%rsp), %rbx
mov 0x70(%rsp), %rbp
mov 0x70(%rsp), %rax
lea 0x78(%rsp), %rsp
mov %rax, %rbp
.Lcbc_dec_epilogue:
ret
.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
@ -1746,6 +1756,8 @@ $code.=<<___;
.type bsaes_ctr32_encrypt_blocks,\@abi-omnipotent
.align 16
bsaes_ctr32_encrypt_blocks:
mov %rsp, %rax
.Lctr_enc_prologue:
push %rbp
push %rbx
push %r12
@ -1919,7 +1931,7 @@ $code.=<<___;
lea 0x20(%rbp), $arg1
lea 0x30(%rbp), $arg2
lea ($key), $arg3
call AES_encrypt
call asm_AES_encrypt
movdqu ($inp), @XMM[1]
lea 16($inp), $inp
mov 0x2c(%rbp), %eax # load 32-bit counter
@ -1964,8 +1976,9 @@ $code.=<<___;
mov 0x58(%rsp), %r13
mov 0x60(%rsp), %r12
mov 0x68(%rsp), %rbx
mov 0x70(%rsp), %rbp
mov 0x70(%rsp), %rax
lea 0x78(%rsp), %rsp
mov %rax, %rbp
.Lctr_enc_epilogue:
ret
.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
@ -1981,6 +1994,8 @@ $code.=<<___;
.type bsaes_xts_encrypt,\@abi-omnipotent
.align 16
bsaes_xts_encrypt:
mov %rsp, %rax
.Lxts_enc_prologue:
push %rbp
push %rbx
push %r12
@ -2015,7 +2030,7 @@ $code.=<<___;
lea ($arg6), $arg1
lea 0x20(%rbp), $arg2
lea ($arg5), $arg3
call AES_encrypt # generate initial tweak
call asm_AES_encrypt # generate initial tweak
mov 240($key), %eax # rounds
mov $len, %rbx # backup $len
@ -2281,7 +2296,7 @@ $code.=<<___;
lea 0x20(%rbp), $arg1
lea 0x20(%rbp), $arg2
lea ($key), $arg3
call AES_encrypt # doesn't touch %xmm
call asm_AES_encrypt # doesn't touch %xmm
pxor 0x20(%rbp), @XMM[0] # ^= tweak[]
#pxor @XMM[8], @XMM[0]
#lea 0x80(%rsp), %rax # pass key schedule
@ -2314,7 +2329,7 @@ $code.=<<___;
lea 0x20(%rbp), $arg2
movdqa @XMM[0], 0x20(%rbp)
lea ($key), $arg3
call AES_encrypt # doesn't touch %xmm
call asm_AES_encrypt # doesn't touch %xmm
pxor 0x20(%rbp), @XMM[7]
movdqu @XMM[7], -16($out)
@ -2349,8 +2364,9 @@ $code.=<<___;
mov 0x58(%rsp), %r13
mov 0x60(%rsp), %r12
mov 0x68(%rsp), %rbx
mov 0x70(%rsp), %rbp
mov 0x70(%rsp), %rax
lea 0x78(%rsp), %rsp
mov %rax, %rbp
.Lxts_enc_epilogue:
ret
.size bsaes_xts_encrypt,.-bsaes_xts_encrypt
@ -2359,6 +2375,8 @@ $code.=<<___;
.type bsaes_xts_decrypt,\@abi-omnipotent
.align 16
bsaes_xts_decrypt:
mov %rsp, %rax
.Lxts_dec_prologue:
push %rbp
push %rbx
push %r12
@ -2393,7 +2411,7 @@ $code.=<<___;
lea ($arg6), $arg1
lea 0x20(%rbp), $arg2
lea ($arg5), $arg3
call AES_encrypt # generate initial tweak
call asm_AES_encrypt # generate initial tweak
mov 240($key), %eax # rounds
mov $len, %rbx # backup $len
@ -2666,7 +2684,7 @@ $code.=<<___;
lea 0x20(%rbp), $arg1
lea 0x20(%rbp), $arg2
lea ($key), $arg3
call AES_decrypt # doesn't touch %xmm
call asm_AES_decrypt # doesn't touch %xmm
pxor 0x20(%rbp), @XMM[0] # ^= tweak[]
#pxor @XMM[8], @XMM[0]
#lea 0x80(%rsp), %rax # pass key schedule
@ -2697,7 +2715,7 @@ $code.=<<___;
lea 0x20(%rbp), $arg2
movdqa @XMM[0], 0x20(%rbp)
lea ($key), $arg3
call AES_decrypt # doesn't touch %xmm
call asm_AES_decrypt # doesn't touch %xmm
pxor 0x20(%rbp), @XMM[7]
mov $out, %rdx
movdqu @XMM[7], ($out)
@ -2718,7 +2736,7 @@ $code.=<<___;
lea 0x20(%rbp), $arg2
movdqa @XMM[0], 0x20(%rbp)
lea ($key), $arg3
call AES_decrypt # doesn't touch %xmm
call asm_AES_decrypt # doesn't touch %xmm
pxor 0x20(%rbp), @XMM[6]
movdqu @XMM[6], ($out)
@ -2753,8 +2771,9 @@ $code.=<<___;
mov 0x58(%rsp), %r13
mov 0x60(%rsp), %r12
mov 0x68(%rsp), %rbx
mov 0x70(%rsp), %rbp
mov 0x70(%rsp), %rax
lea 0x78(%rsp), %rsp
mov %rax, %rbp
.Lxts_dec_epilogue:
ret
.size bsaes_xts_decrypt,.-bsaes_xts_decrypt
@ -2815,6 +2834,168 @@ _bsaes_const:
.size _bsaes_const,.-_bsaes_const
___
# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
# CONTEXT *context,DISPATCHER_CONTEXT *disp)
if ($win64) {
$rec="%rcx";
$frame="%rdx";
$context="%r8";
$disp="%r9";
$code.=<<___;
.extern __imp_RtlVirtualUnwind
.type se_handler,\@abi-omnipotent
.align 16
se_handler:
push %rsi
push %rdi
push %rbx
push %rbp
push %r12
push %r13
push %r14
push %r15
pushfq
sub \$64,%rsp
mov 120($context),%rax # pull context->Rax
mov 248($context),%rbx # pull context->Rip
mov 8($disp),%rsi # disp->ImageBase
mov 56($disp),%r11 # disp->HandlerData
mov 0(%r11),%r10d # HandlerData[0]
lea (%rsi,%r10),%r10 # prologue label
cmp %r10,%rbx # context->Rip<prologue label
jb .Lin_prologue
mov 152($context),%rax # pull context->Rsp
mov 4(%r11),%r10d # HandlerData[1]
lea (%rsi,%r10),%r10 # epilogue label
cmp %r10,%rbx # context->Rip>=epilogue label
jae .Lin_prologue
mov 160($context),%rax # pull context->Rbp
lea 0x40(%rax),%rsi # %xmm save area
lea 512($context),%rdi # &context.Xmm6
mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax)
.long 0xa548f3fc # cld; rep movsq
lea 0xa0(%rax),%rax # adjust stack pointer
mov 0x70(%rax),%rbp
mov 0x68(%rax),%rbx
mov 0x60(%rax),%r12
mov 0x58(%rax),%r13
mov 0x50(%rax),%r14
mov 0x48(%rax),%r15
lea 0x78(%rax),%rax # adjust stack pointer
mov %rbx,144($context) # restore context->Rbx
mov %rbp,160($context) # restore context->Rbp
mov %r12,216($context) # restore context->R12
mov %r13,224($context) # restore context->R13
mov %r14,232($context) # restore context->R14
mov %r15,240($context) # restore context->R15
.Lin_prologue:
mov %rax,152($context) # restore context->Rsp
mov 40($disp),%rdi # disp->ContextRecord
mov $context,%rsi # context
mov \$`1232/8`,%ecx # sizeof(CONTEXT)
.long 0xa548f3fc # cld; rep movsq
mov $disp,%rsi
xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
mov 8(%rsi),%rdx # arg2, disp->ImageBase
mov 0(%rsi),%r8 # arg3, disp->ControlPc
mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
mov 40(%rsi),%r10 # disp->ContextRecord
lea 56(%rsi),%r11 # &disp->HandlerData
lea 24(%rsi),%r12 # &disp->EstablisherFrame
mov %r10,32(%rsp) # arg5
mov %r11,40(%rsp) # arg6
mov %r12,48(%rsp) # arg7
mov %rcx,56(%rsp) # arg8, (NULL)
call *__imp_RtlVirtualUnwind(%rip)
mov \$1,%eax # ExceptionContinueSearch
add \$64,%rsp
popfq
pop %r15
pop %r14
pop %r13
pop %r12
pop %rbp
pop %rbx
pop %rdi
pop %rsi
ret
.size se_handler,.-se_handler
.section .pdata
.align 4
___
$code.=<<___ if ($ecb);
.rva .Lecb_enc_prologue
.rva .Lecb_enc_epilogue
.rva .Lecb_enc_info
.rva .Lecb_dec_prologue
.rva .Lecb_dec_epilogue
.rva .Lecb_dec_info
___
$code.=<<___;
.rva .Lcbc_dec_prologue
.rva .Lcbc_dec_epilogue
.rva .Lcbc_dec_info
.rva .Lctr_enc_prologue
.rva .Lctr_enc_epilogue
.rva .Lctr_enc_info
.rva .Lxts_enc_prologue
.rva .Lxts_enc_epilogue
.rva .Lxts_enc_info
.rva .Lxts_dec_prologue
.rva .Lxts_dec_epilogue
.rva .Lxts_dec_info
.section .xdata
.align 8
___
$code.=<<___ if ($ecb);
.Lecb_enc_info:
.byte 9,0,0,0
.rva se_handler
.rva .Lecb_enc_body,.Lecb_enc_epilogue # HandlerData[]
.Lecb_dec_info:
.byte 9,0,0,0
.rva se_handler
.rva .Lecb_dec_body,.Lecb_dec_epilogue # HandlerData[]
___
$code.=<<___;
.Lcbc_dec_info:
.byte 9,0,0,0
.rva se_handler
.rva .Lcbc_dec_body,.Lcbc_dec_epilogue # HandlerData[]
.Lctr_enc_info:
.byte 9,0,0,0
.rva se_handler
.rva .Lctr_enc_body,.Lctr_enc_epilogue # HandlerData[]
.Lxts_enc_info:
.byte 9,0,0,0
.rva se_handler
.rva .Lxts_enc_body,.Lxts_enc_epilogue # HandlerData[]
.Lxts_dec_info:
.byte 9,0,0,0
.rva se_handler
.rva .Lxts_dec_body,.Lxts_dec_epilogue # HandlerData[]
___
}
$code =~ s/\`([^\`]*)\`/eval($1)/gem;
print $code;