e_padlock-x86[_64].pl: protection against prefetch errata.

This commit is contained in:
Andy Polyakov 2011-10-11 21:07:53 +00:00
parent 3231e42d72
commit 6c8ce3c2ff
2 changed files with 73 additions and 17 deletions

View file

@ -37,6 +37,7 @@ require "x86asm.pl";
&asm_init($ARGV[0],$0);
%PADLOCK_MARGIN=(ecb=>128, cbc=>64); # prefetch errata
$PADLOCK_CHUNK=512; # Must be a power of 2 larger than 16
$ctx="edx";
@ -187,6 +188,10 @@ my ($mode,$opcode) = @_;
&movq ("mm0",&QWP(-16,$ctx)); # load [upper part of] counter
} else {
&xor ("ebx","ebx");
if ($PADLOCK_MARGIN{$mode}) {
&cmp ($len,$PADLOCK_MARGIN{$mode});
&jbe (&label("${mode}_short"));
}
&test (&DWP(0,$ctx),1<<5); # align bit in control word
&jnz (&label("${mode}_aligned"));
&test ($out,0x0f);
@ -285,20 +290,39 @@ my ($mode,$opcode) = @_;
&mov ($chunk,$PADLOCK_CHUNK);
&jnz (&label("${mode}_loop"));
if ($mode ne "ctr32") {
&test ($out,0x0f); # out_misaligned
&jz (&label("${mode}_done"));
&cmp ("esp","ebp");
&je (&label("${mode}_done"));
}
&mov ($len,"ebp");
&mov ($out,"esp");
&sub ($len,"esp");
&xor ("eax","eax");
&shr ($len,2);
&data_byte(0xf3,0xab); # rep stosl
&pxor ("xmm0","xmm0");
&lea ("eax",&DWP(0,"esp"));
&set_label("${mode}_bzero");
&movaps (&QWP(0,"eax"),"xmm0");
&lea ("eax",&DWP(16,"eax"));
&cmp ("ebp","eax");
&ja (&label("${mode}_bzero"));
&set_label("${mode}_done");
&lea ("esp",&DWP(24,"ebp"));
if ($mode ne "ctr32") {
&jmp (&label("${mode}_exit"));
&set_label("${mode}_short",16);
&xor ("eax","eax");
&lea ("ebp",&DWP(-24,"esp"));
&sub ("eax",$len);
&lea ("esp",&DWP(0,"eax","ebp"));
&and ("esp",-16);
&xor ($chunk,$chunk);
&set_label("${mode}_short_copy");
&movups ("xmm0",&QWP(0,$inp,$chunk));
&lea ($chunk,&DWP(16,$chunk));
&cmp ($len,$chunk);
&movaps (&QWP(-16,"esp",$chunk),"xmm0");
&ja (&label("${mode}_short_copy"));
&mov ($inp,"esp");
&mov ($chunk,$len);
&jmp (&label("${mode}_loop"));
&set_label("${mode}_aligned",16);
&lea ("eax",&DWP(-16,$ctx)); # ivp
&lea ("ebx",&DWP(16,$ctx)); # key

View file

@ -27,6 +27,7 @@ open STDOUT,"| $^X $xlate $flavour $output";
$code=".text\n";
%PADLOCK_MARGIN=(ecb=>128, cbc=>64, ctr32=>64); # prefetch errata
$PADLOCK_CHUNK=512; # Must be a power of 2 between 32 and 2^20
$ctx="%rdx";
@ -284,6 +285,17 @@ padlock_${mode}_encrypt:
lea 16($ctx),$ctx # control word
xor %eax,%eax
xor %ebx,%ebx
___
# Formally speaking correct condtion is $len<=$margin and $inp+$margin
# crosses page boundary [and next page is unreadable]. But $inp can
# be unaligned in which case data can be copied to $out if latter is
# aligned, in which case $out+$margin has to be checked. Covering all
# cases appears more complicated than just copying short input...
$code.=<<___ if ($PADLOCK_MARGIN{$mode});
cmp \$$PADLOCK_MARGIN{$mode},$len
jbe .L${mode}_short
___
$code.=<<___;
testl \$`1<<5`,($ctx) # align bit in control word
jnz .L${mode}_aligned
test \$0x0f,$out
@ -305,6 +317,7 @@ padlock_${mode}_encrypt:
lea (%rax,%rbp),%rsp
___
$code.=<<___ if ($mode eq "ctr32");
.L${mode}_reenter:
mov -4($ctx),%eax # pull 32-bit counter
bswap %eax
neg %eax
@ -373,19 +386,38 @@ $code.=<<___;
mov \$$PADLOCK_CHUNK,$chunk
jnz .L${mode}_loop
test \$0x0f,$out
jz .L${mode}_done
cmp %rsp,%rbp
je .L${mode}_done
pxor %xmm0,%xmm0
lea (%rsp),%rax
.L${mode}_bzero:
movaps %xmm0,(%rax)
lea 16(%rax),%rax
cmp %rax,%rbp
ja .L${mode}_bzero
mov %rbp,$len
mov %rsp,$out
sub %rsp,$len
xor %rax,%rax
shr \$3,$len
.byte 0xf3,0x48,0xab # rep stosq
.L${mode}_done:
lea (%rbp),%rsp
jmp .L${mode}_exit
___
$code.=<<___ if ($PADLOCK_MARGIN{$mode});
.align 16
.L${mode}_short:
mov %rsp,%rbp
sub $len,%rsp
xor $chunk,$chunk
.L${mode}_short_copy:
movups ($inp,$chunk),%xmm0
lea 16($chunk),$chunk
cmp $chunk,$len
movaps %xmm0,-16(%rsp,$chunk)
ja .L${mode}_short_copy
mov %rsp,$inp
mov $len,$chunk
jmp .L${mode}_`${mode} eq "ctr32"?"reenter":"loop"`
___
$code.=<<___;
.align 16
.L${mode}_aligned:
___