aesv8-armx.pl: add CTR implementation.

Submitted by: Ard Biesheuvel.
This commit is contained in:
Andy Polyakov 2014-05-29 22:45:35 +02:00
parent 46d889f33e
commit e09ea622bb

View file

@ -20,6 +20,7 @@
#
# CBC enc CBC dec
# Apple A7 2.39 1.20
# Cortex-A5x n/a n/a
$flavour = shift;
$prefix="AES";
@ -479,8 +480,8 @@ $code.=<<___;
aesd $dat1,q15
veor $ivec,$ivec,$dat0
veor $in0,$in0,$dat1
vld1.8 {$dat0},[$inp],$step
veor $in0,$in0,$dat1
vld1.8 {$dat1},[$inp],$step1
vst1.8 {$ivec},[$out],#16
veor $ivec,$in1,$rndlast
@ -622,6 +623,245 @@ $code.=<<___;
.size ${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt
___
}}}
{{{
my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4));
my ($rounds,$cnt,$key_,$ctr,$tctr,$tctr1)=("w5","w6","x7","w8","w9","w10");
my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7));
my ($dat,$tmp)=($dat0,$tmp0);
### q8-q15 preloaded key schedule
$code.=<<___;
.globl ${prefix}_ctr32_encrypt_blocks
.type ${prefix}_ctr32_encrypt_blocks,%function
.align 5
${prefix}_ctr32_encrypt_blocks:
___
$code.=<<___ if ($flavour =~ /64/);
stp x29,x30,[sp,#-16]!
add x29,sp,#0
___
$code.=<<___ if ($flavour !~ /64/);
mov ip,sp
stmdb sp!,{r4-r10,lr}
vstmdb sp!,{d8-d15} @ ABI specification says so
ldr r4, [ip] @ load remaining arg
___
$code.=<<___;
ldr $rounds,[$key,#240]
ldr $ctr, [$ivp, #12]
vld1.32 {$dat0},[$ivp]
vld1.32 {q8-q9},[$key] // load key schedule...
sub $rounds,$rounds,#6
add $key_,$key,x5,lsl#4 // pointer to last 7 round keys
sub $rounds,$rounds,#2
vld1.32 {q10-q11},[$key_],#32
vld1.32 {q12-q13},[$key_],#32
vld1.32 {q14-q15},[$key_],#32
vld1.32 {$rndlast},[$key_]
add $key_,$key,#32
mov $cnt,$rounds
subs $len,$len,#2
b.lo .Lctr32_tail
#ifndef BIG_ENDIAN
rev $ctr, $ctr
#endif
vorr $dat1,$dat0,$dat0
add $ctr, $ctr, #1
vorr $ivec,$dat0,$dat0
rev $tctr1, $ctr
cmp $rounds,#2
vmov.32 ${dat1}[3],$tctr1
b.eq .Lctr32_128
.Loop2x_ctr32:
aese $dat0,q8
aese $dat1,q8
vld1.32 {q8},[$key_],#16
aesmc $dat0,$dat0
aesmc $dat1,$dat1
subs $cnt,$cnt,#2
aese $dat0,q9
aese $dat1,q9
vld1.32 {q9},[$key_],#16
aesmc $dat0,$dat0
aesmc $dat1,$dat1
b.gt .Loop2x_ctr32
aese $dat0,q8
aese $dat1,q8
aesmc $tmp0,$dat0
vorr $dat0,$ivec,$ivec
aesmc $tmp1,$dat1
vorr $dat1,$ivec,$ivec
aese $tmp0,q9
aese $tmp1,q9
vld1.8 {$in0},[$inp],#16
aesmc $tmp0,$tmp0
vld1.8 {$in1},[$inp],#16
aesmc $tmp1,$tmp1
add $ctr,$ctr,#1
aese $tmp0,q10
aese $tmp1,q10
rev $tctr,$ctr
aesmc $tmp0,$tmp0
aesmc $tmp1,$tmp1
add $ctr,$ctr,#1
aese $tmp0,q11
aese $tmp1,q11
veor $in0,$in0,$rndlast
rev $tctr1,$ctr
aesmc $tmp0,$tmp0
aesmc $tmp1,$tmp1
veor $in1,$in1,$rndlast
mov $key_,$key
aese $tmp0,q12
aese $tmp1,q12
subs $len,$len,#2
aesmc $tmp0,$tmp0
aesmc $tmp1,$tmp1
vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0]
aese $tmp0,q13
aese $tmp1,q13
aesmc $tmp0,$tmp0
aesmc $tmp1,$tmp1
vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
aese $tmp0,q14
aese $tmp1,q14
vmov.32 ${dat0}[3], $tctr
aesmc $tmp0,$tmp0
vmov.32 ${dat1}[3], $tctr1
aesmc $tmp1,$tmp1
aese $tmp0,q15
aese $tmp1,q15
mov $cnt,$rounds
veor $in0,$in0,$tmp0
veor $in1,$in1,$tmp1
vst1.8 {$in0},[$out],#16
vst1.8 {$in1},[$out],#16
b.hs .Loop2x_ctr32
adds $len,$len,#2
b.eq .Lctr32_done
b .Lctr32_tail
.Lctr32_128:
vld1.32 {$tmp0-$tmp1},[$key_]
.Loop2x_ctr32_128:
aese $dat0,q8
aese $dat1,q8
aesmc $dat0,$dat0
vld1.8 {$in0},[$inp],#16
aesmc $dat1,$dat1
vld1.8 {$in1},[$inp],#16
aese $dat0,q9
aese $dat1,q9
add $ctr,$ctr,#1
aesmc $dat0,$dat0
aesmc $dat1,$dat1
rev $tctr,$ctr
aese $dat0,$tmp0
aese $dat1,$tmp0
add $ctr,$ctr,#1
aesmc $dat0,$dat0
aesmc $dat1,$dat1
rev $tctr1,$ctr
aese $dat0,$tmp1
aese $dat1,$tmp1
subs $len,$len,#2
aesmc $dat0,$dat0
aesmc $dat1,$dat1
aese $dat0,q10
aese $dat1,q10
aesmc $dat0,$dat0
aesmc $dat1,$dat1
aese $dat0,q11
aese $dat1,q11
aesmc $dat0,$dat0
aesmc $dat1,$dat1
aese $dat0,q12
aese $dat1,q12
aesmc $dat0,$dat0
aesmc $dat1,$dat1
aese $dat0,q13
aese $dat1,q13
aesmc $dat0,$dat0
aesmc $dat1,$dat1
aese $dat0,q14
aese $dat1,q14
aesmc $dat0,$dat0
aesmc $dat1,$dat1
veor $in0,$in0,$rndlast
aese $dat0,q15
veor $in1,$in1,$rndlast
aese $dat1,q15
veor $in0,$in0,$dat0
vorr $dat0,$ivec,$ivec
veor $in1,$in1,$dat1
vorr $dat1,$ivec,$ivec
vst1.8 {$in0},[$out],#16
vmov.32 ${dat0}[3], $tctr
vst1.8 {$in1},[$out],#16
vmov.32 ${dat1}[3], $tctr1
b.hs .Loop2x_ctr32_128
adds $len,$len,#2
b.eq .Lctr32_done
.Lctr32_tail:
aese $dat,q8
vld1.32 {q8},[$key_],#16
aesmc $dat,$dat
subs $cnt,$cnt,#2
aese $dat,q9
vld1.32 {q9},[$key_],#16
aesmc $dat,$dat
b.gt .Lctr32_tail
aese $dat,q8
aesmc $dat,$dat
aese $dat,q9
aesmc $dat,$dat
vld1.8 {$in0},[$inp]
aese $dat,q10
aesmc $dat,$dat
aese $dat,q11
aesmc $dat,$dat
aese $dat,q12
aesmc $dat,$dat
aese $dat,q13
aesmc $dat,$dat
aese $dat,q14
aesmc $dat,$dat
veor $in0,$in0,$rndlast
aese $dat,q15
veor $in0,$in0,$dat
vst1.8 {$in0},[$out]
.Lctr32_done:
___
$code.=<<___ if ($flavour !~ /64/);
vldmia sp!,{d8-d15}
ldmia sp!,{r4-r10,pc}
___
$code.=<<___ if ($flavour =~ /64/);
ldr x29,[sp],#16
ret
___
$code.=<<___;
.size ${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks
___
}}}
########################################
if ($flavour =~ /64/) { ######## 64-bit code
my %opcode = (
@ -691,6 +931,13 @@ if ($flavour =~ /64/) { ######## 64-bit code
sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+$3>>1,$3&1;
}
sub unvmov32 {
my $arg=shift;
$arg =~ m/q([0-9]+)\[([0-3])\],(.*)/o &&
sprintf "vmov.32 d%d[%d],%s",2*$1+$2>>1,$2&1,$3;
}
foreach(split("\n",$code)) {
s/\`([^\`]*)\`/eval($1)/geo;
@ -705,6 +952,7 @@ if ($flavour =~ /64/) { ######## 64-bit code
s/cclr\s+([^,]+),\s*([a-z]+)/mov$2 $1,#0/o or
s/vtbl\.8\s+(.*)/unvtbl($1)/geo or
s/vdup\.32\s+(.*)/unvdup32($1)/geo or
s/vmov\.32\s+(.*)/unvmov32($1)/geo or
s/^(\s+)b\./$1b/o or
s/^(\s+)ret/$1bx\tlr/o;