e_padlock-x86[_64].pl: protection against prefetch errata.
This commit is contained in:
parent
3231e42d72
commit
6c8ce3c2ff
2 changed files with 73 additions and 17 deletions
|
@ -37,6 +37,7 @@ require "x86asm.pl";
|
||||||
|
|
||||||
&asm_init($ARGV[0],$0);
|
&asm_init($ARGV[0],$0);
|
||||||
|
|
||||||
|
%PADLOCK_MARGIN=(ecb=>128, cbc=>64); # prefetch errata
|
||||||
$PADLOCK_CHUNK=512; # Must be a power of 2 larger than 16
|
$PADLOCK_CHUNK=512; # Must be a power of 2 larger than 16
|
||||||
|
|
||||||
$ctx="edx";
|
$ctx="edx";
|
||||||
|
@ -187,6 +188,10 @@ my ($mode,$opcode) = @_;
|
||||||
&movq ("mm0",&QWP(-16,$ctx)); # load [upper part of] counter
|
&movq ("mm0",&QWP(-16,$ctx)); # load [upper part of] counter
|
||||||
} else {
|
} else {
|
||||||
&xor ("ebx","ebx");
|
&xor ("ebx","ebx");
|
||||||
|
if ($PADLOCK_MARGIN{$mode}) {
|
||||||
|
&cmp ($len,$PADLOCK_MARGIN{$mode});
|
||||||
|
&jbe (&label("${mode}_short"));
|
||||||
|
}
|
||||||
&test (&DWP(0,$ctx),1<<5); # align bit in control word
|
&test (&DWP(0,$ctx),1<<5); # align bit in control word
|
||||||
&jnz (&label("${mode}_aligned"));
|
&jnz (&label("${mode}_aligned"));
|
||||||
&test ($out,0x0f);
|
&test ($out,0x0f);
|
||||||
|
@ -285,20 +290,39 @@ my ($mode,$opcode) = @_;
|
||||||
&mov ($chunk,$PADLOCK_CHUNK);
|
&mov ($chunk,$PADLOCK_CHUNK);
|
||||||
&jnz (&label("${mode}_loop"));
|
&jnz (&label("${mode}_loop"));
|
||||||
if ($mode ne "ctr32") {
|
if ($mode ne "ctr32") {
|
||||||
&test ($out,0x0f); # out_misaligned
|
&cmp ("esp","ebp");
|
||||||
&jz (&label("${mode}_done"));
|
&je (&label("${mode}_done"));
|
||||||
}
|
}
|
||||||
&mov ($len,"ebp");
|
&pxor ("xmm0","xmm0");
|
||||||
&mov ($out,"esp");
|
&lea ("eax",&DWP(0,"esp"));
|
||||||
&sub ($len,"esp");
|
&set_label("${mode}_bzero");
|
||||||
&xor ("eax","eax");
|
&movaps (&QWP(0,"eax"),"xmm0");
|
||||||
&shr ($len,2);
|
&lea ("eax",&DWP(16,"eax"));
|
||||||
&data_byte(0xf3,0xab); # rep stosl
|
&cmp ("ebp","eax");
|
||||||
|
&ja (&label("${mode}_bzero"));
|
||||||
|
|
||||||
&set_label("${mode}_done");
|
&set_label("${mode}_done");
|
||||||
&lea ("esp",&DWP(24,"ebp"));
|
&lea ("esp",&DWP(24,"ebp"));
|
||||||
if ($mode ne "ctr32") {
|
if ($mode ne "ctr32") {
|
||||||
&jmp (&label("${mode}_exit"));
|
&jmp (&label("${mode}_exit"));
|
||||||
|
|
||||||
|
&set_label("${mode}_short",16);
|
||||||
|
&xor ("eax","eax");
|
||||||
|
&lea ("ebp",&DWP(-24,"esp"));
|
||||||
|
&sub ("eax",$len);
|
||||||
|
&lea ("esp",&DWP(0,"eax","ebp"));
|
||||||
|
&and ("esp",-16);
|
||||||
|
&xor ($chunk,$chunk);
|
||||||
|
&set_label("${mode}_short_copy");
|
||||||
|
&movups ("xmm0",&QWP(0,$inp,$chunk));
|
||||||
|
&lea ($chunk,&DWP(16,$chunk));
|
||||||
|
&cmp ($len,$chunk);
|
||||||
|
&movaps (&QWP(-16,"esp",$chunk),"xmm0");
|
||||||
|
&ja (&label("${mode}_short_copy"));
|
||||||
|
&mov ($inp,"esp");
|
||||||
|
&mov ($chunk,$len);
|
||||||
|
&jmp (&label("${mode}_loop"));
|
||||||
|
|
||||||
&set_label("${mode}_aligned",16);
|
&set_label("${mode}_aligned",16);
|
||||||
&lea ("eax",&DWP(-16,$ctx)); # ivp
|
&lea ("eax",&DWP(-16,$ctx)); # ivp
|
||||||
&lea ("ebx",&DWP(16,$ctx)); # key
|
&lea ("ebx",&DWP(16,$ctx)); # key
|
||||||
|
|
|
@ -27,6 +27,7 @@ open STDOUT,"| $^X $xlate $flavour $output";
|
||||||
|
|
||||||
$code=".text\n";
|
$code=".text\n";
|
||||||
|
|
||||||
|
%PADLOCK_MARGIN=(ecb=>128, cbc=>64, ctr32=>64); # prefetch errata
|
||||||
$PADLOCK_CHUNK=512; # Must be a power of 2 between 32 and 2^20
|
$PADLOCK_CHUNK=512; # Must be a power of 2 between 32 and 2^20
|
||||||
|
|
||||||
$ctx="%rdx";
|
$ctx="%rdx";
|
||||||
|
@ -284,6 +285,17 @@ padlock_${mode}_encrypt:
|
||||||
lea 16($ctx),$ctx # control word
|
lea 16($ctx),$ctx # control word
|
||||||
xor %eax,%eax
|
xor %eax,%eax
|
||||||
xor %ebx,%ebx
|
xor %ebx,%ebx
|
||||||
|
___
|
||||||
|
# Formally speaking correct condtion is $len<=$margin and $inp+$margin
|
||||||
|
# crosses page boundary [and next page is unreadable]. But $inp can
|
||||||
|
# be unaligned in which case data can be copied to $out if latter is
|
||||||
|
# aligned, in which case $out+$margin has to be checked. Covering all
|
||||||
|
# cases appears more complicated than just copying short input...
|
||||||
|
$code.=<<___ if ($PADLOCK_MARGIN{$mode});
|
||||||
|
cmp \$$PADLOCK_MARGIN{$mode},$len
|
||||||
|
jbe .L${mode}_short
|
||||||
|
___
|
||||||
|
$code.=<<___;
|
||||||
testl \$`1<<5`,($ctx) # align bit in control word
|
testl \$`1<<5`,($ctx) # align bit in control word
|
||||||
jnz .L${mode}_aligned
|
jnz .L${mode}_aligned
|
||||||
test \$0x0f,$out
|
test \$0x0f,$out
|
||||||
|
@ -305,6 +317,7 @@ padlock_${mode}_encrypt:
|
||||||
lea (%rax,%rbp),%rsp
|
lea (%rax,%rbp),%rsp
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($mode eq "ctr32");
|
$code.=<<___ if ($mode eq "ctr32");
|
||||||
|
.L${mode}_reenter:
|
||||||
mov -4($ctx),%eax # pull 32-bit counter
|
mov -4($ctx),%eax # pull 32-bit counter
|
||||||
bswap %eax
|
bswap %eax
|
||||||
neg %eax
|
neg %eax
|
||||||
|
@ -373,19 +386,38 @@ $code.=<<___;
|
||||||
mov \$$PADLOCK_CHUNK,$chunk
|
mov \$$PADLOCK_CHUNK,$chunk
|
||||||
jnz .L${mode}_loop
|
jnz .L${mode}_loop
|
||||||
|
|
||||||
test \$0x0f,$out
|
cmp %rsp,%rbp
|
||||||
jz .L${mode}_done
|
je .L${mode}_done
|
||||||
|
|
||||||
|
pxor %xmm0,%xmm0
|
||||||
|
lea (%rsp),%rax
|
||||||
|
.L${mode}_bzero:
|
||||||
|
movaps %xmm0,(%rax)
|
||||||
|
lea 16(%rax),%rax
|
||||||
|
cmp %rax,%rbp
|
||||||
|
ja .L${mode}_bzero
|
||||||
|
|
||||||
mov %rbp,$len
|
|
||||||
mov %rsp,$out
|
|
||||||
sub %rsp,$len
|
|
||||||
xor %rax,%rax
|
|
||||||
shr \$3,$len
|
|
||||||
.byte 0xf3,0x48,0xab # rep stosq
|
|
||||||
.L${mode}_done:
|
.L${mode}_done:
|
||||||
lea (%rbp),%rsp
|
lea (%rbp),%rsp
|
||||||
jmp .L${mode}_exit
|
jmp .L${mode}_exit
|
||||||
|
___
|
||||||
|
$code.=<<___ if ($PADLOCK_MARGIN{$mode});
|
||||||
|
.align 16
|
||||||
|
.L${mode}_short:
|
||||||
|
mov %rsp,%rbp
|
||||||
|
sub $len,%rsp
|
||||||
|
xor $chunk,$chunk
|
||||||
|
.L${mode}_short_copy:
|
||||||
|
movups ($inp,$chunk),%xmm0
|
||||||
|
lea 16($chunk),$chunk
|
||||||
|
cmp $chunk,$len
|
||||||
|
movaps %xmm0,-16(%rsp,$chunk)
|
||||||
|
ja .L${mode}_short_copy
|
||||||
|
mov %rsp,$inp
|
||||||
|
mov $len,$chunk
|
||||||
|
jmp .L${mode}_`${mode} eq "ctr32"?"reenter":"loop"`
|
||||||
|
___
|
||||||
|
$code.=<<___;
|
||||||
.align 16
|
.align 16
|
||||||
.L${mode}_aligned:
|
.L${mode}_aligned:
|
||||||
___
|
___
|
||||||
|
|
Loading…
Reference in a new issue