openssl/crypto/aes/asm/aes-s390x.pl
2007-07-23 20:38:57 +00:00

1071 lines
27 KiB
Raku

#!/usr/bin/env perl
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
# AES for s390x.
# April 2007.
#
# Software performance improvement over gcc-generated code is ~70% and
# in absolute terms is ~73 cycles per byte processed with 128-bit key.
# You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
# *strictly* in-order execution and issued instruction [in this case
# load value from memory is critical] has to complete before execution
# flow proceeds. S-boxes are compressed to 2KB[+256B].
#
# As for hardware acceleration support. It's basically a "teaser," as
# it can and should be improved in several ways. Most notably support
# for CBC is not utilized, nor multiple blocks are ever processed.
# Then software key schedule can be postponed till hardware support
# detection... Performance improvement over assembler is reportedly
# ~2.5x, but can reach >8x [naturally on larger chunks] if proper
# support is implemented.
# May 2007.
#
# Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
# for 128-bit keys, if hardware support is detected.
$softonly=0; # allow hardware support
$t1="%r0";
$t2="%r1";
$t3="%r2"; $inp="%r2";
$out="%r3"; $mask="%r3"; $bits="%r3";
$key="%r4";
$i1="%r5";
$i2="%r6";
$i3="%r7";
$s0="%r8";
$s1="%r9";
$s2="%r10";
$s3="%r11";
$tbl="%r12";
$rounds="%r13";
$ra="%r14";
$sp="%r15";
sub _data_word()
{ my $i;
while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
}
$code=<<___;
.text
.type AES_Te,\@object
.align 64
AES_Te:
___
&_data_word(
0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
$code.=<<___;
# Te4[256]
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
# rcon[]
.long 0x01000000, 0x02000000, 0x04000000, 0x08000000
.long 0x10000000, 0x20000000, 0x40000000, 0x80000000
.long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
.size AES_Te,.-AES_Te
# void AES_encrypt(const unsigned char *inp, unsigned char *out,
# const AES_KEY *key) {
.globl AES_encrypt
.type AES_encrypt,\@function
AES_encrypt:
stg $ra,112($sp)
___
$code.=<<___ if (!$softonly);
lghi %r0,10
c %r0,240($key)
jne .Lesoft
lghi %r0,0 # query capability vector
la %r1,16($sp)
.long 0xb92e0042 # km %r4,%r2
lg %r0,16($sp)
tmhl %r0,`0x8000>>2`
jz .Lesoft128
lghi %r0,`0x00|0x12` # encrypt AES-128
la %r1,0($key)
#la %r2,0($inp)
la %r4,0($out)
lghi %r3,16 # single block length
.long 0xb92e0042 # km %r4,%r2
bcr 8,%r14 # return if done
la $out,0(%r4) # restore arguments
la $key,0(%r1)
.Lesoft128:
lghi %r0,0
c %r0,236($key)
je .Lesoft
stmg $inp,$key,16($sp)
la $inp,0($key)
lghi $bits,128
bras $ra,.Lekey_internal # postponed key schedule setup
lmg $inp,$key,16($sp)
.Lesoft:
___
$code.=<<___;
stmg %r3,%r13,24($sp)
larl $tbl,AES_Te
llgf $s0,0($inp)
llgf $s1,4($inp)
llgf $s2,8($inp)
llgf $s3,12($inp)
llill $mask,`0xff<<3`
bras $ra,_s390x_AES_encrypt
lg $out,24($sp)
st $s0,0($out)
st $s1,4($out)
st $s2,8($out)
st $s3,12($out)
lmg %r6,$ra,48($sp)
br $ra
.size AES_encrypt,.-AES_encrypt
.type _s390x_AES_encrypt,\@function
.align 16
_s390x_AES_encrypt:
x $s0,0($key)
x $s1,4($key)
x $s2,8($key)
x $s3,12($key)
l $rounds,240($key)
aghi $rounds,-1
.Lenc_loop:
sllg $i1,$s0,`0+3`
srlg $i2,$s0,`8-3`
srlg $i3,$s0,`16-3`
srl $s0,`24-3`
nr $s0,$mask
ngr $i1,$mask
nr $i2,$mask
nr $i3,$mask
l $s0,0($s0,$tbl) # Te0[s0>>24]
l $t1,1($i1,$tbl) # Te3[s0>>0]
l $t2,2($i2,$tbl) # Te2[s0>>8]
l $t3,3($i3,$tbl) # Te1[s0>>16]
srlg $i1,$s1,`16-3` # i0
sllg $i2,$s1,`0+3`
srlg $i3,$s1,`8-3`
srl $s1,`24-3`
nr $i1,$mask
nr $s1,$mask
ngr $i2,$mask
nr $i3,$mask
x $s0,3($i1,$tbl) # Te1[s1>>16]
l $s1,0($s1,$tbl) # Te0[s1>>24]
x $t2,1($i2,$tbl) # Te3[s1>>0]
x $t3,2($i3,$tbl) # Te2[s1>>8]
xr $s1,$t1
srlg $i1,$s2,`8-3` # i0
srlg $i2,$s2,`16-3` # i1
sllg $i3,$s2,`0+3`
srl $s2,`24-3`
nr $i1,$mask
nr $i2,$mask
nr $s2,$mask
ngr $i3,$mask
x $s0,2($i1,$tbl) # Te2[s2>>8]
x $s1,3($i2,$tbl) # Te1[s2>>16]
l $s2,0($s2,$tbl) # Te0[s2>>24]
x $t3,1($i3,$tbl) # Te3[s2>>0]
xr $s2,$t2
sllg $i1,$s3,`0+3` # i0
srlg $i2,$s3,`8-3` # i1
srlg $i3,$s3,`16-3` # i2
srl $s3,`24-3`
ngr $i1,$mask
nr $i2,$mask
nr $i3,$mask
nr $s3,$mask
x $s0,1($i1,$tbl) # Te3[s3>>0]
x $s1,2($i2,$tbl) # Te2[s3>>8]
x $s2,3($i3,$tbl) # Te1[s3>>16]
l $s3,0($s3,$tbl) # Te0[s3>>24]
xr $s3,$t3
la $key,16($key)
x $s0,0($key)
x $s1,4($key)
x $s2,8($key)
x $s3,12($key)
brct $rounds,.Lenc_loop
sllg $i1,$s0,`0+3`
srlg $i2,$s0,`8-3`
srlg $i3,$s0,`16-3`
srl $s0,`24-3`
nr $s0,$mask
ngr $i1,$mask
nr $i2,$mask
nr $i3,$mask
llgc $s0,2($s0,$tbl) # Te4[s0>>24]
llgc $t1,2($i1,$tbl) # Te4[s0>>0]
llgc $t2,2($i2,$tbl) # Te4[s0>>8]
llgc $t3,2($i3,$tbl) # Te4[s0>>16]
sll $s0,24
sll $t2,8
sll $t3,16
srlg $i1,$s1,`16-3` # i0
sllg $i2,$s1,`0+3`
srlg $i3,$s1,`8-3`
srl $s1,`24-3`
nr $i1,$mask
nr $s1,$mask
ngr $i2,$mask
nr $i3,$mask
llgc $i1,2($i1,$tbl) # Te4[s1>>16]
llgc $s1,2($s1,$tbl) # Te4[s1>>24]
llgc $i2,2($i2,$tbl) # Te4[s1>>0]
llgc $i3,2($i3,$tbl) # Te4[s1>>8]
sll $i1,16
sll $s1,24
sll $i3,8
or $s0,$i1
or $s1,$t1
or $t2,$i2
or $t3,$i3
srlg $i1,$s2,`8-3` # i0
srlg $i2,$s2,`16-3` # i1
sllg $i3,$s2,`0+3`
srl $s2,`24-3`
nr $i1,$mask
nr $i2,$mask
nr $s2,$mask
ngr $i3,$mask
llgc $i1,2($i1,$tbl) # Te4[s2>>8]
llgc $i2,2($i2,$tbl) # Te4[s2>>16]
llgc $s2,2($s2,$tbl) # Te4[s2>>24]
llgc $i3,2($i3,$tbl) # Te4[s2>>0]
sll $i1,8
sll $i2,16
sll $s2,24
or $s0,$i1
or $s1,$i2
or $s2,$t2
or $t3,$i3
sllg $i1,$s3,`0+3` # i0
srlg $i2,$s3,`8-3` # i1
srlg $i3,$s3,`16-3` # i2
srl $s3,`24-3`
ngr $i1,$mask
nr $i2,$mask
nr $i3,$mask
nr $s3,$mask
llgc $i1,2($i1,$tbl) # Te4[s3>>0]
llgc $i2,2($i2,$tbl) # Te4[s3>>8]
llgc $i3,2($i3,$tbl) # Te4[s3>>16]
llgc $s3,2($s3,$tbl) # Te4[s3>>24]
sll $i2,8
sll $i3,16
sll $s3,24
or $s0,$i1
or $s1,$i2
or $s2,$i3
or $s3,$t3
x $s0,16($key)
x $s1,20($key)
x $s2,24($key)
x $s3,28($key)
br $ra
.size _s390x_AES_encrypt,.-_s390x_AES_encrypt
___
$code.=<<___;
.type AES_Td,\@object
.align 64
AES_Td:
___
&_data_word(
0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
$code.=<<___;
# Td4[256]
.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
.size AES_Td,.-AES_Td
# void AES_decrypt(const unsigned char *inp, unsigned char *out,
# const AES_KEY *key) {
.globl AES_decrypt
.type AES_decrypt,\@function
AES_decrypt:
stg $ra,112($sp)
___
$code.=<<___ if (!$softonly);
lghi %r0,10
c %r0,240($key)
jne .Ldsoft
lghi %r0,0 # query capability vector
la %r1,16($sp)
.long 0xb92e0042 # km %r4,%r2
lg %r0,16($sp)
tmhl %r0,`0x8000>>2`
jz .Ldsoft128
lghi %r0,`0x80|0x12` # decrypt AES-128
la %r1,160($key)
#la %r2,0($inp)
la %r4,0($out)
lghi %r3,16 # single block length
.long 0xb92e0042 # km %r4,%r2
bcr 8,%r14 # return if done
la $out,0(%r4) # restore arguments
lghi $key,-160
la $key,0($key,%r1)
.Ldsoft128:
lghi %r0,0
c %r0,236($key)
je .Ldsoft
stmg $inp,$key,16($sp)
la $inp,160($key)
lghi $bits,128
bras $ra,.Ldkey_internal # postponed key schedule setup
lmg $inp,$key,16($sp)
.Ldsoft:
___
$code.=<<___;
stmg %r3,%r13,24($sp)
larl $tbl,AES_Td
llgf $s0,0($inp)
llgf $s1,4($inp)
llgf $s2,8($inp)
llgf $s3,12($inp)
llill $mask,`0xff<<3`
bras $ra,_s390x_AES_decrypt
lg $out,24($sp)
st $s0,0($out)
st $s1,4($out)
st $s2,8($out)
st $s3,12($out)
lmg %r6,$ra,48($sp)
br $ra
.size AES_decrypt,.-AES_decrypt
.type _s390x_AES_decrypt,\@function
.align 16
_s390x_AES_decrypt:
x $s0,0($key)
x $s1,4($key)
x $s2,8($key)
x $s3,12($key)
l $rounds,240($key)
aghi $rounds,-1
.Ldec_loop:
srlg $i1,$s0,`16-3`
srlg $i2,$s0,`8-3`
sllg $i3,$s0,`0+3`
srl $s0,`24-3`
nr $s0,$mask
nr $i1,$mask
nr $i2,$mask
ngr $i3,$mask
l $s0,0($s0,$tbl) # Td0[s0>>24]
l $t1,3($i1,$tbl) # Td1[s0>>16]
l $t2,2($i2,$tbl) # Td2[s0>>8]
l $t3,1($i3,$tbl) # Td3[s0>>0]
sllg $i1,$s1,`0+3` # i0
srlg $i2,$s1,`16-3`
srlg $i3,$s1,`8-3`
srl $s1,`24-3`
ngr $i1,$mask
nr $s1,$mask
nr $i2,$mask
nr $i3,$mask
x $s0,1($i1,$tbl) # Td3[s1>>0]
l $s1,0($s1,$tbl) # Td0[s1>>24]
x $t2,3($i2,$tbl) # Td1[s1>>16]
x $t3,2($i3,$tbl) # Td2[s1>>8]
xr $s1,$t1
srlg $i1,$s2,`8-3` # i0
sllg $i2,$s2,`0+3` # i1
srlg $i3,$s2,`16-3`
srl $s2,`24-3`
nr $i1,$mask
ngr $i2,$mask
nr $s2,$mask
nr $i3,$mask
x $s0,2($i1,$tbl) # Td2[s2>>8]
x $s1,1($i2,$tbl) # Td3[s2>>0]
l $s2,0($s2,$tbl) # Td0[s2>>24]
x $t3,3($i3,$tbl) # Td1[s2>>16]
xr $s2,$t2
srlg $i1,$s3,`16-3` # i0
srlg $i2,$s3,`8-3` # i1
sllg $i3,$s3,`0+3` # i2
srl $s3,`24-3`
nr $i1,$mask
nr $i2,$mask
ngr $i3,$mask
nr $s3,$mask
x $s0,3($i1,$tbl) # Td1[s3>>16]
x $s1,2($i2,$tbl) # Td2[s3>>8]
x $s2,1($i3,$tbl) # Td3[s3>>0]
l $s3,0($s3,$tbl) # Td0[s3>>24]
xr $s3,$t3
la $key,16($key)
x $s0,0($key)
x $s1,4($key)
x $s2,8($key)
x $s3,12($key)
brct $rounds,.Ldec_loop
l $t1,`2048+0`($tbl) # prefetch Td4
l $t2,`2048+32`($tbl)
l $t3,`2048+64`($tbl)
l $i1,`2048+96`($tbl)
l $i2,`2048+128`($tbl)
l $i3,`2048+160`($tbl)
l $t1,`2048+192`($tbl)
l $t2,`2048+224`($tbl)
llill $mask,0xff
srlg $i3,$s0,24 # i0
srlg $i1,$s0,16
srlg $i2,$s0,8
nr $s0,$mask # i3
nr $i1,$mask
nr $i2,$mask
llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
llgc $t1,2048($i1,$tbl) # Td4[s0>>16]
llgc $t2,2048($i2,$tbl) # Td4[s0>>8]
llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
sllg $s0,$i3,24
sll $t1,16
sll $t2,8
srlg $i1,$s1,24
srlg $i2,$s1,16
srlg $i3,$s1,8
nr $s1,$mask # i0
nr $i2,$mask
nr $i3,$mask
llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
llgc $i3,2048($i3,$tbl) # Td4[s1>>8]
sll $i1,24
sll $i2,16
sll $i3,8
or $s0,$s1
or $t1,$i1
or $t2,$i2
or $t3,$i3
srlg $i1,$s2,8 # i0
srlg $i2,$s2,24
srlg $i3,$s2,16
nr $s2,$mask # i1
nr $i1,$mask
nr $i3,$mask
llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
sll $i1,8
sll $i2,24
sll $i3,16
or $s0,$i1
or $s1,$t1
or $t2,$i2
or $t3,$i3
srlg $i1,$s3,16 # i0
srlg $i2,$s3,8 # i1
srlg $i3,$s3,24
nr $s3,$mask # i2
nr $i1,$mask
nr $i2,$mask
llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
sll $i1,16
sll $i2,8
sll $s3,24
or $s0,$i1
or $s1,$i2
or $s2,$t2
or $s3,$t3
x $s0,16($key)
x $s1,20($key)
x $s2,24($key)
x $s3,28($key)
br $ra
.size _s390x_AES_decrypt,.-_s390x_AES_decrypt
# void AES_set_encrypt_key(const unsigned char *in, int bits,
# AES_KEY *key) {
.globl AES_set_encrypt_key
.type AES_set_encrypt_key,\@function
.align 16
AES_set_encrypt_key:
lghi $t1,0
clgr $inp,$t1
je .Lminus1
clgr $key,$t1
je .Lminus1
lghi $t1,128
clr $bits,$t1
je .Lproceed128
lghi $t1,192
clr $bits,$t1
je .Lekey_internal
lghi $t1,256
clr $bits,$t1
je .Lekey_internal
lghi %r2,-2
br %r14
.align 4
.Lproceed128:
___
$code.=<<___ if (!$softonly);
lghi %r0,0 # query capability vector
la %r1,16($sp)
.long 0xb92e0042 # km %r4,%r2
lg %r0,16($sp)
tmhl %r0,`0x8000>>2`
jz .Lekey_internal
lmg $t1,$t2,0($inp) # just copy 128 bits...
stmg $t1,$t2,0($key)
lghi $t1,10
st $t1,236($key) # ... postpone key setup
st $t1,240($key)
lghi %r2,0
br %r14
___
$code.=<<___;
.align 16
.Lekey_internal:
stmg %r6,%r13,48($sp) # all non-volatile regs
larl $tbl,AES_Te+2048
llgf $s0,0($inp)
llgf $s1,4($inp)
llgf $s2,8($inp)
llgf $s3,12($inp)
st $s0,0($key)
st $s1,4($key)
st $s2,8($key)
st $s3,12($key)
lghi $t1,128
cr $bits,$t1
jne .Lnot128
llill $mask,0xff
lghi $t3,0 # i=0
lghi $rounds,10
st $t3,236($key) # mark as set up
st $rounds,240($key)
.align 8
.L128_loop:
llgfr $t2,$s3 # temp=rk[3]
srlg $i1,$s3,8
srlg $i2,$s3,16
srlg $i3,$s3,24
nr $t2,$mask
nr $i1,$mask
nr $i2,$mask
la $t2,0($t2,$tbl)
la $i1,0($i1,$tbl)
la $i2,0($i2,$tbl)
la $i3,0($i3,$tbl)
icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
icm $t2,1,0($i3) # Te4[rk[3]>>24]
x $t2,256($t3,$tbl) # rcon[i]
xr $s0,$t2 # rk[4]=rk[0]^...
xr $s1,$s0 # rk[5]=rk[1]^rk[4]
xr $s2,$s1 # rk[6]=rk[2]^rk[5]
xr $s3,$s2 # rk[7]=rk[3]^rk[6]
st $s0,16($key)
st $s1,20($key)
st $s2,24($key)
st $s3,28($key)
la $key,16($key) # key+=4
la $t3,4($t3) # i++
brct $rounds,.L128_loop
lghi %r2,0
lmg %r6,%r13,48($sp)
br $ra
.align 4
.Lnot128:
llgf $t1,16($inp)
llgf $t2,20($inp)
st $t1,16($key)
st $t2,20($key)
lghi $t1,192
cr $bits,$t1
jne .Lnot192
llill $mask,0xff
lghi $t3,0 # i=0
lghi $rounds,12
st $rounds,240($key)
lghi $rounds,8
.align 8
.L192_loop:
srlg $i1,$t2,8
srlg $i2,$t2,16
srlg $i3,$t2,24
nr $t2,$mask
nr $i1,$mask
nr $i2,$mask
la $t2,0($t2,$tbl)
la $i1,0($i1,$tbl)
la $i2,0($i2,$tbl)
la $i3,0($i3,$tbl)
icm $t2,2,0($t2) # Te4[rk[5]>>0]<<8
icm $t2,4,0($i1) # Te4[rk[5]>>8]<<16
icm $t2,8,0($i2) # Te4[rk[5]>>16]<<24
icm $t2,1,0($i3) # Te4[rk[5]>>24]
x $t2,256($t3,$tbl) # rcon[i]
xr $s0,$t2 # rk[6]=rk[0]^...
xr $s1,$s0 # rk[7]=rk[1]^rk[6]
xr $s2,$s1 # rk[8]=rk[2]^rk[7]
xr $s3,$s2 # rk[9]=rk[3]^rk[8]
st $s0,24($key)
st $s1,28($key)
st $s2,32($key)
st $s3,36($key)
brct $rounds,.L192_continue
lghi %r2,0
lmg %r6,%r13,48($sp)
br $ra
.align 4
.L192_continue:
lgr $t2,$s3
x $t2,16($key) # rk[10]=rk[4]^rk[9]
st $t2,40($key)
x $t2,20($key) # rk[11]=rk[5]^rk[10]
st $t2,44($key)
la $key,24($key) # key+=6
la $t3,4($t3) # i++
j .L192_loop
.align 4
.Lnot192:
llgf $t1,24($inp)
llgf $t2,28($inp)
st $t1,24($key)
st $t2,28($key)
llill $mask,0xff
lghi $t3,0 # i=0
lghi $rounds,14
st $rounds,240($key)
lghi $rounds,7
.align 8
.L256_loop:
srlg $i1,$t2,8
srlg $i2,$t2,16
srlg $i3,$t2,24
nr $t2,$mask
nr $i1,$mask
nr $i2,$mask
la $t2,0($t2,$tbl)
la $i1,0($i1,$tbl)
la $i2,0($i2,$tbl)
la $i3,0($i3,$tbl)
icm $t2,2,0($t2) # Te4[rk[7]>>0]<<8
icm $t2,4,0($i1) # Te4[rk[7]>>8]<<16
icm $t2,8,0($i2) # Te4[rk[7]>>16]<<24
icm $t2,1,0($i3) # Te4[rk[7]>>24]
x $t2,256($t3,$tbl) # rcon[i]
xr $s0,$t2 # rk[8]=rk[0]^...
xr $s1,$s0 # rk[9]=rk[1]^rk[8]
xr $s2,$s1 # rk[10]=rk[2]^rk[9]
xr $s3,$s2 # rk[11]=rk[3]^rk[10]
st $s0,32($key)
st $s1,36($key)
st $s2,40($key)
st $s3,44($key)
brct $rounds,.L256_continue
lghi %r2,0
lmg %r6,%r13,48($sp)
br $ra
.align 4
.L256_continue:
lgr $t2,$s3 # temp=rk[11]
srlg $i1,$s3,8
srlg $i2,$s3,16
srlg $i3,$s3,24
nr $t2,$mask
nr $i1,$mask
nr $i2,$mask
la $t2,0($t2,$tbl)
la $i1,0($i1,$tbl)
la $i2,0($i2,$tbl)
la $i3,0($i3,$tbl)
llgc $t2,0($t2) # Te4[rk[11]>>0]
icm $t2,2,0($i1) # Te4[rk[11]>>8]<<8
icm $t2,4,0($i2) # Te4[rk[11]>>16]<<16
icm $t2,8,0($i3) # Te4[rk[11]>>24]<<24
x $t2,16($key) # rk[12]=rk[4]^...
st $t2,48($key)
x $t2,20($key) # rk[13]=rk[5]^rk[12]
st $t2,52($key)
x $t2,24($key) # rk[14]=rk[6]^rk[13]
st $t2,56($key)
x $t2,28($key) # rk[15]=rk[7]^rk[14]
st $t2,60($key)
la $key,32($key) # key+=8
la $t3,4($t3) # i++
j .L256_loop
.align 4
.Lminus1:
lghi %r2,-1
br %r14
.size AES_set_encrypt_key,.-AES_set_encrypt_key
# void AES_set_decrypt_key(const unsigned char *in, int bits,
# AES_KEY *key) {
.globl AES_set_decrypt_key
.type AES_set_decrypt_key,\@function
.align 16
AES_set_decrypt_key:
stg $key,32($sp) # I rely on AES_set_encrypt_key to
stg $ra,112($sp) # save non-volatile registers!
bras $ra,AES_set_encrypt_key
lg $key,32($sp)
lg $ra,112($sp)
ltgr %r2,%r2
bnzr $ra
___
$code.=<<___ if (!$softonly);
lghi $t1,10
c $t1,240($key)
jne .Lgo
lghi $t1,0
c $t1,236($key)
je .Lgo
lmg $t1,$t2,0($key) # just copy 128 bits otherwise
stmg $t1,$t2,160($key)
lghi %r2,0
br $ra
.align 16
.Ldkey_internal:
stg $key,32($sp)
stg $ra,40($sp)
bras $ra,.Lekey_internal
lg $key,32($sp)
lg $ra,40($sp)
___
$code.=<<___;
.Lgo: llgf $rounds,240($key)
la $i1,0($key)
sllg $i2,$rounds,4
la $i2,0($i2,$key)
srl $rounds,1
.align 8
.Linv: lmg $s0,$s1,0($i1)
lmg $s2,$s3,0($i2)
stmg $s0,$s1,0($i2)
stmg $s2,$s3,0($i1)
aghi $i1,16
aghi $i2,-16
brct $rounds,.Linv
___
$mask80=$i1;
$mask1b=$i2;
$maskfe=$i3;
$code.=<<___;
llgf $rounds,240($key)
aghi $rounds,-1
sll $rounds,2 # (rounds-1)*4
llilh $mask80,0x8080
oill $mask80,0x8080
llilh $mask1b,0x1b1b
oill $mask1b,0x1b1b
llilh $maskfe,0xfefe
oill $maskfe,0xfefe
.align 8
.Lmix: l $s0,16($key) # tp1
lr $s1,$s0
ngr $s1,$mask80
srlg $t1,$s1,7
slr $s1,$t1
nr $s1,$mask1b
sllg $t1,$s0,1
nr $t1,$maskfe
xr $s1,$t1 # tp2
lr $s2,$s1
ngr $s2,$mask80
srlg $t1,$s2,7
slr $s2,$t1
nr $s2,$mask1b
sllg $t1,$s1,1
nr $t1,$maskfe
xr $s2,$t1 # tp4
lr $s3,$s2
ngr $s3,$mask80
srlg $t1,$s3,7
slr $s3,$t1
nr $s3,$mask1b
sllg $t1,$s2,1
nr $t1,$maskfe
xr $s3,$t1 # tp8
xr $s1,$s0 # tp2^tp1
xr $s2,$s0 # tp4^tp1
rll $s0,$s0,24 # = ROTATE(tp1,8)
xr $s0,$s1 # ^=tp2^tp1
xr $s0,$s2 # ^=tp4^tp1
xr $s0,$s3 # ^= tp8[^(tp4^tp1)^(tp2^tp1)=tp4^tp2]
xr $s1,$s3 # tp2^tp1^tp8
rll $s1,$s1,8
xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
xr $s2,$s3 # tp4^tp1^tp8
rll $s2,$s2,16
xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
rll $s3,$s3,24
xr $s0,$s3 # ^= ROTATE(tp8,8)
st $s0,16($key)
la $key,4($key)
brct $rounds,.Lmix
lmg %r6,%r13,48($sp)# as was saved by AES_set_encrypt_key!
lghi %r2,0
br $ra
.size AES_set_decrypt_key,.-AES_set_decrypt_key
.string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
___
$code =~ s/\`([^\`]*)\`/eval $1/gem;
print $code;