Add "teaser" AES module for PowerISA 2.07.
"Teaser" means that it's not integrated yet and purpose of this commit is primarily informational, to exhibit design choices, such as how to handle alignment and endianness. In other words it's proof-of-concept code that EVP module will build upon.
This commit is contained in:
parent
7b06ac7593
commit
f75faa16af
2 changed files with 755 additions and 0 deletions
722
crypto/aes/asm/aesp8-ppc.pl
Executable file
722
crypto/aes/asm/aesp8-ppc.pl
Executable file
|
@ -0,0 +1,722 @@
|
|||
#!/usr/bin/env perl
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
#
|
||||
# This module implements support for AES instructions as per PowerISA
|
||||
# specification version 2.07, first implemented by POWER8 processor.
|
||||
# The module is endian-agnostic in sense that it supports both big-
|
||||
# and little-endian cases. As well as alignment-agnostic, and it is
|
||||
# guaranteed not to cause alignment exceptions. [One of options was
|
||||
# to use VSX loads and stores, which tolerate unaligned references,
|
||||
# but even then specification doesn't prohibit exceptions on page
|
||||
# boundaries.]
|
||||
|
||||
$flavour = shift;
|
||||
|
||||
if ($flavour =~ /64/) {
|
||||
$SIZE_T =8;
|
||||
$LRSAVE =2*$SIZE_T;
|
||||
$STU ="stdu";
|
||||
$POP ="ld";
|
||||
$PUSH ="std";
|
||||
} elsif ($flavour =~ /32/) {
|
||||
$SIZE_T =4;
|
||||
$LRSAVE =$SIZE_T;
|
||||
$STU ="stwu";
|
||||
$POP ="lwz";
|
||||
$PUSH ="stw";
|
||||
} else { die "nonsense $flavour"; }
|
||||
|
||||
$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
|
||||
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
|
||||
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
|
||||
die "can't locate ppc-xlate.pl";
|
||||
|
||||
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
|
||||
|
||||
$FRAME=8*$SIZE_T;
|
||||
$prefix="AES";
|
||||
|
||||
$sp="r1";
|
||||
$vrsave="r12";
|
||||
|
||||
{{{
|
||||
my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
|
||||
my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
|
||||
my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
|
||||
|
||||
$code.=<<___;
|
||||
.machine "any"
|
||||
|
||||
.text
|
||||
|
||||
.align 7
|
||||
rcon:
|
||||
.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
|
||||
.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
|
||||
.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
|
||||
.long 0,0,0,0 ?asis
|
||||
Lconsts:
|
||||
mflr r0
|
||||
bcl 20,31,\$+4
|
||||
mflr $ptr #vvvvv "distance between . and rcon
|
||||
addi $ptr,$ptr,-0x48
|
||||
mtlr r0
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,0,0
|
||||
.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
|
||||
.globl .${prefix}_set_encrypt_key
|
||||
.align 5
|
||||
.${prefix}_set_encrypt_key:
|
||||
Lset_encrypt_key:
|
||||
mflr r11
|
||||
li r0,0xfff
|
||||
$PUSH r11,$LRSAVE($sp)
|
||||
mfspr $vrsave,256
|
||||
mtspr 256,r0
|
||||
|
||||
bl Lconsts
|
||||
mtlr r11
|
||||
|
||||
neg r9,$inp
|
||||
lvx $in0,0,$inp
|
||||
addi $inp,$inp,15 # 15 is not typo
|
||||
lvsr $key,0,r9 # borrow $key
|
||||
li r8,0x20
|
||||
cmpwi $bits,192
|
||||
lvx $in1,0,$inp
|
||||
___
|
||||
$code.=<<___ if ($LITTLE_ENDIAN);
|
||||
vspltisb $mask,0x0f # borrow $mask
|
||||
vxor $key,$key,$mask # adjust for byte swap
|
||||
___
|
||||
$code.=<<___;
|
||||
lvx $rcon,0,$ptr
|
||||
lvx $mask,r8,$ptr
|
||||
addi $ptr,$ptr,0x10
|
||||
vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
|
||||
li $cnt,8
|
||||
vxor $zero,$zero,$zero
|
||||
mtctr $cnt
|
||||
|
||||
?lvsr $outperm,0,$out
|
||||
vspltisb $outmask,-1
|
||||
lvx $outhead,0,$out
|
||||
?vperm $outmask,$zero,$outmask,$outperm
|
||||
|
||||
blt Loop128
|
||||
addi $inp,$inp,8
|
||||
beq L192
|
||||
addi $inp,$inp,8
|
||||
b L256
|
||||
|
||||
.align 4
|
||||
Loop128:
|
||||
vperm $key,$in0,$in0,$mask # rotate-n-splat
|
||||
vsldoi $tmp,$zero,$in0,12 # >>32
|
||||
vperm $outtail,$in0,$in0,$outperm # rotate
|
||||
vsel $stage,$outhead,$outtail,$outmask
|
||||
vmr $outhead,$outtail
|
||||
vcipherlast $key,$key,$rcon
|
||||
stvx $stage,0,$out
|
||||
addi $out,$out,16
|
||||
|
||||
vxor $in0,$in0,$tmp
|
||||
vsldoi $tmp,$zero,$tmp,12 # >>32
|
||||
vxor $in0,$in0,$tmp
|
||||
vsldoi $tmp,$zero,$tmp,12 # >>32
|
||||
vxor $in0,$in0,$tmp
|
||||
vadduwm $rcon,$rcon,$rcon
|
||||
vxor $in0,$in0,$key
|
||||
bdnz Loop128
|
||||
|
||||
lvx $rcon,0,$ptr # last two round keys
|
||||
|
||||
vperm $key,$in0,$in0,$mask # rotate-n-splat
|
||||
vsldoi $tmp,$zero,$in0,12 # >>32
|
||||
vperm $outtail,$in0,$in0,$outperm # rotate
|
||||
vsel $stage,$outhead,$outtail,$outmask
|
||||
vmr $outhead,$outtail
|
||||
vcipherlast $key,$key,$rcon
|
||||
stvx $stage,0,$out
|
||||
addi $out,$out,16
|
||||
|
||||
vxor $in0,$in0,$tmp
|
||||
vsldoi $tmp,$zero,$tmp,12 # >>32
|
||||
vxor $in0,$in0,$tmp
|
||||
vsldoi $tmp,$zero,$tmp,12 # >>32
|
||||
vxor $in0,$in0,$tmp
|
||||
vadduwm $rcon,$rcon,$rcon
|
||||
vxor $in0,$in0,$key
|
||||
|
||||
vperm $key,$in0,$in0,$mask # rotate-n-splat
|
||||
vsldoi $tmp,$zero,$in0,12 # >>32
|
||||
vperm $outtail,$in0,$in0,$outperm # rotate
|
||||
vsel $stage,$outhead,$outtail,$outmask
|
||||
vmr $outhead,$outtail
|
||||
vcipherlast $key,$key,$rcon
|
||||
stvx $stage,0,$out
|
||||
addi $out,$out,16
|
||||
|
||||
vxor $in0,$in0,$tmp
|
||||
vsldoi $tmp,$zero,$tmp,12 # >>32
|
||||
vxor $in0,$in0,$tmp
|
||||
vsldoi $tmp,$zero,$tmp,12 # >>32
|
||||
vxor $in0,$in0,$tmp
|
||||
vxor $in0,$in0,$key
|
||||
vperm $outtail,$in0,$in0,$outperm # rotate
|
||||
vsel $stage,$outhead,$outtail,$outmask
|
||||
vmr $outhead,$outtail
|
||||
stvx $stage,0,$out
|
||||
|
||||
addi $inp,$out,15 # 15 is not typo
|
||||
addi $out,$out,0x50
|
||||
|
||||
li $rounds,10
|
||||
b Ldone
|
||||
|
||||
.align 4
|
||||
L192:
|
||||
lvx $tmp,0,$inp
|
||||
li $cnt,4
|
||||
vperm $outtail,$in0,$in0,$outperm # rotate
|
||||
vsel $stage,$outhead,$outtail,$outmask
|
||||
vmr $outhead,$outtail
|
||||
stvx $stage,0,$out
|
||||
addi $out,$out,16
|
||||
vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
|
||||
vspltisb $key,8 # borrow $key
|
||||
mtctr $cnt
|
||||
vsububm $mask,$mask,$key # adjust the mask
|
||||
|
||||
Loop192:
|
||||
vperm $key,$in1,$in1,$mask # roate-n-splat
|
||||
vsldoi $tmp,$zero,$in0,12 # >>32
|
||||
vcipherlast $key,$key,$rcon
|
||||
|
||||
vxor $in0,$in0,$tmp
|
||||
vsldoi $tmp,$zero,$tmp,12 # >>32
|
||||
vxor $in0,$in0,$tmp
|
||||
vsldoi $tmp,$zero,$tmp,12 # >>32
|
||||
vxor $in0,$in0,$tmp
|
||||
|
||||
vsldoi $stage,$zero,$in1,8
|
||||
vspltw $tmp,$in0,3
|
||||
vxor $tmp,$tmp,$in1
|
||||
vsldoi $in1,$zero,$in1,12 # >>32
|
||||
vadduwm $rcon,$rcon,$rcon
|
||||
vxor $in1,$in1,$tmp
|
||||
vxor $in0,$in0,$key
|
||||
vxor $in1,$in1,$key
|
||||
vsldoi $stage,$stage,$in0,8
|
||||
|
||||
vperm $key,$in1,$in1,$mask # rotate-n-splat
|
||||
vsldoi $tmp,$zero,$in0,12 # >>32
|
||||
vperm $outtail,$stage,$stage,$outperm # rotate
|
||||
vsel $stage,$outhead,$outtail,$outmask
|
||||
vmr $outhead,$outtail
|
||||
vcipherlast $key,$key,$rcon
|
||||
stvx $stage,0,$out
|
||||
addi $out,$out,16
|
||||
|
||||
vsldoi $stage,$in0,$in1,8
|
||||
vxor $in0,$in0,$tmp
|
||||
vsldoi $tmp,$zero,$tmp,12 # >>32
|
||||
vperm $outtail,$stage,$stage,$outperm # rotate
|
||||
vsel $stage,$outhead,$outtail,$outmask
|
||||
vmr $outhead,$outtail
|
||||
vxor $in0,$in0,$tmp
|
||||
vsldoi $tmp,$zero,$tmp,12 # >>32
|
||||
vxor $in0,$in0,$tmp
|
||||
stvx $stage,0,$out
|
||||
addi $out,$out,16
|
||||
|
||||
vspltw $tmp,$in0,3
|
||||
vxor $tmp,$tmp,$in1
|
||||
vsldoi $in1,$zero,$in1,12 # >>32
|
||||
vadduwm $rcon,$rcon,$rcon
|
||||
vxor $in1,$in1,$tmp
|
||||
vxor $in0,$in0,$key
|
||||
vxor $in1,$in1,$key
|
||||
vperm $outtail,$in0,$in0,$outperm # rotate
|
||||
vsel $stage,$outhead,$outtail,$outmask
|
||||
vmr $outhead,$outtail
|
||||
stvx $stage,0,$out
|
||||
addi $inp,$out,15 # 15 is not typo
|
||||
addi $out,$out,16
|
||||
bdnz Loop192
|
||||
|
||||
li $rounds,12
|
||||
addi $out,$out,0x20
|
||||
b Ldone
|
||||
|
||||
.align 4
|
||||
L256:
|
||||
lvx $tmp,0,$inp
|
||||
li $cnt,7
|
||||
li $rounds,14
|
||||
vperm $outtail,$in0,$in0,$outperm # rotate
|
||||
vsel $stage,$outhead,$outtail,$outmask
|
||||
vmr $outhead,$outtail
|
||||
stvx $stage,0,$out
|
||||
addi $out,$out,16
|
||||
vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
|
||||
mtctr $cnt
|
||||
|
||||
Loop256:
|
||||
vperm $key,$in1,$in1,$mask # rotate-n-splat
|
||||
vsldoi $tmp,$zero,$in0,12 # >>32
|
||||
vperm $outtail,$in1,$in1,$outperm # rotate
|
||||
vsel $stage,$outhead,$outtail,$outmask
|
||||
vmr $outhead,$outtail
|
||||
vcipherlast $key,$key,$rcon
|
||||
stvx $stage,0,$out
|
||||
addi $out,$out,16
|
||||
|
||||
vxor $in0,$in0,$tmp
|
||||
vsldoi $tmp,$zero,$tmp,12 # >>32
|
||||
vxor $in0,$in0,$tmp
|
||||
vsldoi $tmp,$zero,$tmp,12 # >>32
|
||||
vxor $in0,$in0,$tmp
|
||||
vadduwm $rcon,$rcon,$rcon
|
||||
vxor $in0,$in0,$key
|
||||
vperm $outtail,$in0,$in0,$outperm # rotate
|
||||
vsel $stage,$outhead,$outtail,$outmask
|
||||
vmr $outhead,$outtail
|
||||
stvx $stage,0,$out
|
||||
addi $inp,$out,15 # 15 is not typo
|
||||
addi $out,$out,16
|
||||
bdz Ldone
|
||||
|
||||
vspltw $key,$in0,3 # just splat
|
||||
vsldoi $tmp,$zero,$in1,12 # >>32
|
||||
vsbox $key,$key
|
||||
|
||||
vxor $in1,$in1,$tmp
|
||||
vsldoi $tmp,$zero,$tmp,12 # >>32
|
||||
vxor $in1,$in1,$tmp
|
||||
vsldoi $tmp,$zero,$tmp,12 # >>32
|
||||
vxor $in1,$in1,$tmp
|
||||
|
||||
vxor $in1,$in1,$key
|
||||
b Loop256
|
||||
|
||||
.align 4
|
||||
Ldone:
|
||||
lvx $in1,0,$inp # redundant in aligned case
|
||||
vsel $in1,$outhead,$in1,$outmask
|
||||
stvx $in1,0,$inp
|
||||
xor r3,r3,r3 # return value
|
||||
mtspr 256,$vrsave
|
||||
stw $rounds,0($out)
|
||||
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,1,0,0,3,0
|
||||
.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
|
||||
|
||||
.globl .${prefix}_set_decrypt_key
|
||||
.align 5
|
||||
.${prefix}_set_decrypt_key:
|
||||
$STU $sp,-$FRAME($sp)
|
||||
mflr r10
|
||||
$PUSH r10,$FRAME+$LRSAVE($sp)
|
||||
bl Lset_encrypt_key
|
||||
mtlr r10
|
||||
|
||||
slwi $cnt,$rounds,4
|
||||
subi $inp,$out,240 # first round key
|
||||
srwi $rounds,$rounds,1
|
||||
add $out,$inp,$cnt # last round key
|
||||
mtctr $rounds
|
||||
|
||||
Ldeckey:
|
||||
lwz r0, 0($inp)
|
||||
lwz r6, 4($inp)
|
||||
lwz r7, 8($inp)
|
||||
lwz r8, 12($inp)
|
||||
addi $inp,$inp,16
|
||||
lwz r9, 0($out)
|
||||
lwz r10,4($out)
|
||||
lwz r11,8($out)
|
||||
lwz r12,12($out)
|
||||
stw r0, 0($out)
|
||||
stw r6, 4($out)
|
||||
stw r7, 8($out)
|
||||
stw r8, 12($out)
|
||||
subi $out,$out,16
|
||||
stw r9, -16($inp)
|
||||
stw r10,-12($inp)
|
||||
stw r11,-8($inp)
|
||||
stw r12,-4($inp)
|
||||
bdnz Ldeckey
|
||||
|
||||
xor r3,r3,r3 # return value
|
||||
addi $sp,$sp,$FRAME
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,4,1,0x80,0,3,0
|
||||
.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
|
||||
___
|
||||
}}}
|
||||
{{{
|
||||
my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
|
||||
|
||||
$code.=<<___;
|
||||
.globl .${prefix}_encrypt
|
||||
.align 5
|
||||
.${prefix}_encrypt:
|
||||
lwz $rounds,240($key)
|
||||
li r0,0x3f
|
||||
mfspr $vrsave,256
|
||||
li $idx,15 # 15 is not typo
|
||||
mtspr 256,r0
|
||||
|
||||
lvx v0,0,$inp
|
||||
neg r11,$out
|
||||
lvx v1,$idx,$inp
|
||||
lvsl v2,0,$inp # inpperm
|
||||
`"vspltisb v4,0x0f" if ($LITTLE_ENDIAN)`
|
||||
?lvsl v3,0,r11 # outperm
|
||||
`"vxor v2,v2,v4" if ($LITTLE_ENDIAN)`
|
||||
li $idx,16
|
||||
vperm v0,v0,v1,v2 # align [and byte swap in LE]
|
||||
lvx v1,0,$key
|
||||
?lvsl v5,0,$key # keyperm
|
||||
srwi $rounds,$rounds,1
|
||||
lvx v2,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
subi $rounds,$rounds,1
|
||||
?vperm v1,v1,v2,v5 # align round key
|
||||
|
||||
vxor v0,v0,v1
|
||||
lvx v1,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
mtctr $rounds
|
||||
|
||||
Loop_enc:
|
||||
?vperm v2,v2,v1,v5
|
||||
vcipher v0,v0,v2
|
||||
lvx v2,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
?vperm v1,v1,v2,v5
|
||||
vcipher v0,v0,v1
|
||||
lvx v1,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
bdnz Loop_enc
|
||||
|
||||
?vperm v2,v2,v1,v5
|
||||
vcipher v0,v0,v2
|
||||
lvx v2,$idx,$key
|
||||
?vperm v1,v1,v2,v5
|
||||
vcipherlast v0,v0,v1
|
||||
|
||||
vspltisb v2,-1
|
||||
vxor v1,v1,v1
|
||||
li $idx,15 # 15 is not typo
|
||||
?vperm v2,v1,v2,v3 # outmask
|
||||
`"vxor v3,v3,v4" if ($LITTLE_ENDIAN)`
|
||||
lvx v1,0,$out # outhead
|
||||
vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
|
||||
vsel v1,v1,v0,v2
|
||||
lvx v4,$idx,$out
|
||||
stvx v1,0,$out
|
||||
vsel v0,v0,v4,v2
|
||||
stvx v0,$idx,$out
|
||||
|
||||
mtspr 256,$vrsave
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,3,0
|
||||
.size .${prefix}_encrypt,.-.${prefix}_encrypt
|
||||
|
||||
.globl .${prefix}_decrypt
|
||||
.align 5
|
||||
.${prefix}_decrypt:
|
||||
lwz $rounds,240($key)
|
||||
li r0,0x3f
|
||||
mfspr $vrsave,256
|
||||
li $idx,15 # 15 is not typo
|
||||
mtspr 256,r0
|
||||
|
||||
lvx v0,0,$inp
|
||||
neg r11,$out
|
||||
lvx v1,$idx,$inp
|
||||
lvsl v2,0,$inp # inpperm
|
||||
`"vspltisb v4,0x0f" if ($LITTLE_ENDIAN)`
|
||||
?lvsl v3,0,r11 # outperm
|
||||
`"vxor v2,v2,v4" if ($LITTLE_ENDIAN)`
|
||||
li $idx,16
|
||||
vperm v0,v0,v1,v2 # align [and byte swap in LE]
|
||||
lvx v1,0,$key
|
||||
?lvsl v5,0,$key # keyperm
|
||||
srwi $rounds,$rounds,1
|
||||
lvx v2,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
subi $rounds,$rounds,1
|
||||
?vperm v1,v1,v2,v5 # align round key
|
||||
|
||||
vxor v0,v0,v1
|
||||
lvx v1,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
mtctr $rounds
|
||||
|
||||
Loop_dec:
|
||||
?vperm v2,v2,v1,v5
|
||||
vncipher v0,v0,v2
|
||||
lvx v2,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
?vperm v1,v1,v2,v5
|
||||
vncipher v0,v0,v1
|
||||
lvx v1,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
bdnz Loop_dec
|
||||
|
||||
?vperm v2,v2,v1,v5
|
||||
vncipher v0,v0,v2
|
||||
lvx v2,$idx,$key
|
||||
?vperm v1,v1,v2,v5
|
||||
vncipherlast v0,v0,v1
|
||||
|
||||
vspltisb v2,-1
|
||||
vxor v1,v1,v1
|
||||
li $idx,15 # 15 is not typo
|
||||
?vperm v2,v1,v2,v3 # outmask
|
||||
`"vxor v3,v3,v4" if ($LITTLE_ENDIAN)`
|
||||
lvx v1,0,$out # outhead
|
||||
vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
|
||||
vsel v1,v1,v0,v2
|
||||
lvx v4,$idx,$out
|
||||
stvx v1,0,$out
|
||||
vsel v0,v0,v4,v2
|
||||
stvx v0,$idx,$out
|
||||
|
||||
mtspr 256,$vrsave
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,3,0
|
||||
.size .${prefix}_decrypt,.-.${prefix}_decrypt
|
||||
___
|
||||
}}}
|
||||
{{{
|
||||
my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
|
||||
my ($rndkey0,$rndkey1,$inout,$ivec,$tmp)=map("v$_",(0..4));
|
||||
my ($inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=map("v$_",(5..10));
|
||||
|
||||
$code.=<<___;
|
||||
.globl .${prefix}_cbc_encrypt
|
||||
.align 5
|
||||
.${prefix}_cbc_encrypt:
|
||||
subic. $len,$len,16
|
||||
bltlr-
|
||||
|
||||
cmpwi $enc,0 # test direction
|
||||
li r0,0x7ff
|
||||
mfspr $vrsave,256
|
||||
mtspr 256,r0
|
||||
|
||||
li $idx,15
|
||||
vxor $rndkey0,$rndkey0,$rndkey0
|
||||
`"vspltisb $tmp,0x0f" if ($LITTLE_ENDIAN)`
|
||||
|
||||
lvx $ivec,0,$ivp # load [unaligned] iv
|
||||
lvsl $inpperm,0,$ivp
|
||||
lvx $inptail,$idx,$ivp
|
||||
`"vxor $inpperm,$inpperm,$tmp" if ($LITTLE_ENDIAN)`
|
||||
vperm $ivec,$ivec,$inptail,$inpperm
|
||||
|
||||
?lvsl $keyperm,0,$key # prepare for unaligned key
|
||||
lwz $rounds,240($key)
|
||||
|
||||
lvsl $inpperm,0,$inp # prepare for unaligned load
|
||||
lvx $inptail,0,$inp
|
||||
addi $inp,$inp,15 # 15 is not typo
|
||||
`"vxor $inpperm,$inpperm,$tmp" if ($LITTLE_ENDIAN)`
|
||||
|
||||
?lvsr $outperm,0,$out # prepare for unaligned store
|
||||
vspltisb $outmask,-1
|
||||
lvx $outhead,0,$out
|
||||
?vperm $outmask,$rndkey0,$outmask,$outperm
|
||||
`"vxor $outperm,$outperm,$tmp" if ($LITTLE_ENDIAN)`
|
||||
|
||||
srwi $rounds,$rounds,1
|
||||
li $idx,16
|
||||
subi $rounds,$rounds,1
|
||||
beq Lcbc_dec
|
||||
|
||||
Lcbc_enc:
|
||||
vmr $inout,$inptail
|
||||
lvx $inptail,0,$inp
|
||||
addi $inp,$inp,16
|
||||
mtctr $rounds
|
||||
|
||||
lvx $rndkey0,0,$key
|
||||
vperm $inout,$inout,$inptail,$inpperm
|
||||
lvx $rndkey1,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
||||
vxor $inout,$inout,$rndkey0
|
||||
lvx $rndkey0,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
vxor $inout,$inout,$ivec
|
||||
|
||||
Loop_cbc_enc:
|
||||
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
||||
vcipher $inout,$inout,$rndkey1
|
||||
lvx $rndkey1,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
||||
vcipher $inout,$inout,$rndkey0
|
||||
lvx $rndkey0,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
bdnz Loop_cbc_enc
|
||||
|
||||
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
||||
vcipher $inout,$inout,$rndkey1
|
||||
lvx $rndkey1,$idx,$key
|
||||
li $idx,16
|
||||
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
||||
vcipherlast $ivec,$inout,$rndkey0
|
||||
sub. $len,$len,$idx # len -=16
|
||||
|
||||
vperm $tmp,$ivec,$ivec,$outperm
|
||||
vsel $inout,$outhead,$tmp,$outmask
|
||||
vmr $outhead,$tmp
|
||||
stvx $inout,0,$out
|
||||
addi $out,$out,16
|
||||
bge Lcbc_enc
|
||||
|
||||
b Lcbc_done
|
||||
|
||||
.align 4
|
||||
Lcbc_dec:
|
||||
vmr $tmp,$inptail
|
||||
lvx $inptail,0,$inp
|
||||
addi $inp,$inp,16
|
||||
mtctr $rounds
|
||||
|
||||
lvx $rndkey0,0,$key
|
||||
vperm $tmp,$tmp,$inptail,$inpperm
|
||||
lvx $rndkey1,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
||||
vxor $inout,$tmp,$rndkey0
|
||||
lvx $rndkey0,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
|
||||
Loop_cbc_dec:
|
||||
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
||||
vncipher $inout,$inout,$rndkey1
|
||||
lvx $rndkey1,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
||||
vncipher $inout,$inout,$rndkey0
|
||||
lvx $rndkey0,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
bdnz Loop_cbc_dec
|
||||
|
||||
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
||||
vncipher $inout,$inout,$rndkey1
|
||||
lvx $rndkey1,$idx,$key
|
||||
li $idx,16
|
||||
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
||||
vncipherlast $inout,$inout,$rndkey0
|
||||
sub. $len,$len,$idx # len -=16
|
||||
|
||||
vxor $inout,$inout,$ivec
|
||||
vmr $ivec,$tmp
|
||||
vperm $tmp,$inout,$inout,$outperm
|
||||
vsel $inout,$outhead,$tmp,$outmask
|
||||
vmr $outhead,$tmp
|
||||
stvx $inout,0,$out
|
||||
addi $out,$out,16
|
||||
bge Lcbc_dec
|
||||
|
||||
Lcbc_done:
|
||||
addi $out,$out,-1
|
||||
lvx $inout,0,$out # redundant in aligned case
|
||||
vsel $inout,$outhead,$inout,$outmask
|
||||
stvx $inout,0,$out
|
||||
|
||||
neg $enc,$ivp # write [unaligned] iv
|
||||
li $idx,15 # 15 is not typo
|
||||
vxor $rndkey0,$rndkey0,$rndkey0
|
||||
vspltisb $outmask,-1
|
||||
`"vspltisb $tmp,0x0f" if ($LITTLE_ENDIAN)`
|
||||
?lvsl $outperm,0,$enc
|
||||
?vperm $outmask,$rndkey0,$outmask,$outperm
|
||||
`"vxor $outperm,$outperm,$tmp" if ($LITTLE_ENDIAN)`
|
||||
lvx $outhead,0,$ivp
|
||||
vperm $ivec,$ivec,$ivec,$outperm
|
||||
vsel $inout,$outhead,$ivec,$outmask
|
||||
lvx $inptail,$idx,$ivp
|
||||
stvx $inout,0,$ivp
|
||||
vsel $inout,$ivec,$inptail,$outmask
|
||||
stvx $inout,$idx,$ivp
|
||||
|
||||
mtspr 256,$vrsave
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,6,0
|
||||
.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
|
||||
___
|
||||
}}}
|
||||
|
||||
my $consts=1;
|
||||
foreach(split("\n",$code)) {
|
||||
s/\`([^\`]*)\`/eval($1)/geo;
|
||||
|
||||
# constants table endian-specific conversion
|
||||
if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
|
||||
my $conv=$3;
|
||||
my @bytes=();
|
||||
|
||||
# convert to endian-agnostic format
|
||||
if ($1 eq "long") {
|
||||
foreach (split(/,\s*/,$2)) {
|
||||
my $l = /^0/?oct:int;
|
||||
push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
|
||||
}
|
||||
} else {
|
||||
@bytes = map(/^0/?oct:int,split(/,\s*/,$2));
|
||||
}
|
||||
|
||||
# little-endian conversion
|
||||
if ($flavour =~ /le$/o) {
|
||||
SWITCH: for($conv) {
|
||||
/\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
|
||||
/\?rev/ && do { @bytes=reverse(@bytes); last; };
|
||||
}
|
||||
}
|
||||
|
||||
#emit
|
||||
print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
|
||||
next;
|
||||
}
|
||||
$consts=0 if (m/Lconsts:/o); # end of table
|
||||
|
||||
# instructions prefixed with '?' are endian-specific and need
|
||||
# to be adjusted accordingly...
|
||||
if ($flavour =~ /le$/o) { # little-endian
|
||||
s/\?lvsr/lvsl/o or
|
||||
s/\?lvsl/lvsr/o or
|
||||
s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
|
||||
s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
|
||||
s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
|
||||
} else { # big-endian
|
||||
s/\?([a-z]+)/$1/o;
|
||||
}
|
||||
|
||||
print $_,"\n";
|
||||
}
|
||||
|
||||
close STDOUT;
|
|
@ -151,6 +151,39 @@ my $vmr = sub {
|
|||
" vor $vx,$vy,$vy";
|
||||
};
|
||||
|
||||
# PowerISA 2.06 stuff
|
||||
sub vsxmem_op {
|
||||
my ($f, $vrt, $ra, $rb, $op) = @_;
|
||||
" .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
|
||||
}
|
||||
# made-up unaligned memory reference AltiVec/VMX instructions
|
||||
my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x
|
||||
my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x
|
||||
my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx
|
||||
my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx
|
||||
|
||||
# PowerISA 2.07 stuff
|
||||
sub vcrypto_op {
|
||||
my ($f, $vrt, $vra, $vrb, $op) = @_;
|
||||
" .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
|
||||
}
|
||||
my $vcipher = sub { vcrypto_op(@_, 1288); };
|
||||
my $vcipherlast = sub { vcrypto_op(@_, 1289); };
|
||||
my $vncipher = sub { vcrypto_op(@_, 1352); };
|
||||
my $vncipherlast= sub { vcrypto_op(@_, 1353); };
|
||||
my $vsbox = sub { vcrypto_op(@_, 0, 1480); };
|
||||
my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
|
||||
my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
|
||||
my $vpmsumb = sub { vcrypto_op(@_, 1032); };
|
||||
my $vpmsumd = sub { vcrypto_op(@_, 1224); };
|
||||
my $vpmsubh = sub { vcrypto_op(@_, 1096); };
|
||||
my $vpmsumw = sub { vcrypto_op(@_, 1160); };
|
||||
|
||||
my $mtsle = sub {
|
||||
my ($f, $arg) = @_;
|
||||
" .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
|
||||
};
|
||||
|
||||
while($line=<>) {
|
||||
|
||||
$line =~ s|[#!;].*$||; # get rid of asm-style comments...
|
||||
|
|
Loading…
Reference in a new issue