openssl/crypto/aes/asm/aes-s390x.pl

1518 lines
36 KiB
Perl
Raw Normal View History

2007-04-30 08:42:54 +00:00
#!/usr/bin/env perl
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
# AES for s390x.
# April 2007.
#
# Software performance improvement over gcc-generated code is ~70% and
# in absolute terms is ~73 cycles per byte processed with 128-bit key.
# You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
# *strictly* in-order execution and issued instruction [in this case
# load value from memory is critical] has to complete before execution
2007-05-28 16:30:18 +00:00
# flow proceeds. S-boxes are compressed to 2KB[+256B].
2007-04-30 08:42:54 +00:00
#
# As for hardware acceleration support. It's basically a "teaser," as
# it can and should be improved in several ways. Most notably support
# for CBC is not utilized, nor multiple blocks are ever processed.
# Then software key schedule can be postponed till hardware support
# detection... Performance improvement over assembler is reportedly
# ~2.5x, but can reach >8x [naturally on larger chunks] if proper
2007-04-30 08:42:54 +00:00
# support is implemented.
2007-05-28 16:30:18 +00:00
# May 2007.
#
# Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
# for 128-bit keys, if hardware support is detected.
2009-02-09 15:42:04 +00:00
# Januray 2009.
#
# Add support for hardware AES192/256 and reschedule instructions to
# minimize/avoid Address Generation Interlock hazard and to favour
2009-03-17 20:04:11 +00:00
# dual-issue z10 pipeline. This gave ~25% improvement on z10 and
# almost 50% on z9. The gain is smaller on z10, because being dual-
# issue z10 makes it improssible to eliminate the interlock condition:
# critial path is not long enough. Yet it spends ~24 cycles per byte
# processed with 128-bit key.
2009-02-09 15:42:04 +00:00
#
# Unlike previous version hardware support detection takes place only
# at the moment of key schedule setup, which is denoted in key->rounds.
# This is done, because deferred key setup can't be made MT-safe, not
# for keys longer than 128 bits.
2009-02-09 15:42:04 +00:00
#
# Add AES_cbc_encrypt, which gives incredible performance improvement,
# it was measured to be ~6.6x. It's less than previously mentioned 8x,
# because software implementation was optimized.
# May 2010.
#
# Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x
# performance improvement over "generic" counter mode routine relying
# on single-block, also hardware-assisted, AES_encrypt. "Up to" refers
# to the fact that exact throughput value depends on current stack
# frame alignment within 4KB page. In worst case you get ~75% of the
# maximum, but *on average* it would be as much as ~98%. Meaning that
# worst case is unlike, it's like hitting ravine on plateau.
# November 2010.
#
# Adapt for -m31 build. If kernel supports what's called "highgprs"
# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
# instructions and achieve "64-bit" performance even in 31-bit legacy
# application context. The feature is not specific to any particular
# processor, as long as it's "z-CPU". Latter implies that the code
# remains z/Architecture specific. On z990 it was measured to perform
# 2x better than code generated by gcc 4.3.
$flavour = shift;
if ($flavour =~ /3[12]/) {
$SIZE_T=4;
$g="";
} else {
$SIZE_T=8;
$g="g";
}
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$softonly=0; # allow hardware support
2009-02-09 15:42:04 +00:00
$t0="%r0"; $mask="%r0";
$t1="%r1";
$t2="%r2"; $inp="%r2";
$t3="%r3"; $out="%r3"; $bits="%r3";
2007-04-30 08:42:54 +00:00
$key="%r4";
$i1="%r5";
$i2="%r6";
$i3="%r7";
$s0="%r8";
$s1="%r9";
$s2="%r10";
$s3="%r11";
$tbl="%r12";
$rounds="%r13";
$ra="%r14";
$sp="%r15";
$stdframe=16*$SIZE_T+4*8;
2007-04-30 08:42:54 +00:00
sub _data_word()
{ my $i;
while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
}
$code=<<___;
.text
.type AES_Te,\@object
2009-02-09 15:42:04 +00:00
.align 256
2007-04-30 08:42:54 +00:00
AES_Te:
___
&_data_word(
0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
$code.=<<___;
2007-05-28 16:30:18 +00:00
# Te4[256]
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
# rcon[]
.long 0x01000000, 0x02000000, 0x04000000, 0x08000000
.long 0x10000000, 0x20000000, 0x40000000, 0x80000000
.long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
2009-02-09 15:42:04 +00:00
.align 256
2007-04-30 08:42:54 +00:00
.size AES_Te,.-AES_Te
2007-05-28 16:30:18 +00:00
# void AES_encrypt(const unsigned char *inp, unsigned char *out,
2007-04-30 08:42:54 +00:00
# const AES_KEY *key) {
.globl AES_encrypt
.type AES_encrypt,\@function
AES_encrypt:
___
$code.=<<___ if (!$softonly);
2009-02-09 15:42:04 +00:00
l %r0,240($key)
lhi %r1,16
clr %r0,%r1
jl .Lesoft
2007-04-30 08:42:54 +00:00
la %r1,0($key)
2007-05-03 09:12:47 +00:00
#la %r2,0($inp)
2007-04-30 08:42:54 +00:00
la %r4,0($out)
lghi %r3,16 # single block length
.long 0xb92e0042 # km %r4,%r2
2009-02-09 15:42:04 +00:00
brc 1,.-4 # can this happen?
br %r14
.align 64
2007-04-30 08:42:54 +00:00
.Lesoft:
___
$code.=<<___;
stm${g} %r3,$ra,3*$SIZE_T($sp)
2007-04-30 08:42:54 +00:00
llgf $s0,0($inp)
llgf $s1,4($inp)
llgf $s2,8($inp)
llgf $s3,12($inp)
2009-02-09 15:42:04 +00:00
larl $tbl,AES_Te
2007-04-30 08:42:54 +00:00
bras $ra,_s390x_AES_encrypt
l${g} $out,3*$SIZE_T($sp)
2007-04-30 08:42:54 +00:00
st $s0,0($out)
st $s1,4($out)
st $s2,8($out)
st $s3,12($out)
lm${g} %r6,$ra,6*$SIZE_T($sp)
2007-05-28 16:30:18 +00:00
br $ra
2007-04-30 08:42:54 +00:00
.size AES_encrypt,.-AES_encrypt
.type _s390x_AES_encrypt,\@function
.align 16
_s390x_AES_encrypt:
st${g} $ra,`$stdframe-$SIZE_T`($sp)
2007-04-30 08:42:54 +00:00
x $s0,0($key)
x $s1,4($key)
x $s2,8($key)
x $s3,12($key)
l $rounds,240($key)
2009-02-09 15:42:04 +00:00
llill $mask,`0xff<<3`
2007-04-30 08:42:54 +00:00
aghi $rounds,-1
2009-02-09 15:42:04 +00:00
j .Lenc_loop
.align 16
2007-04-30 08:42:54 +00:00
.Lenc_loop:
2009-02-09 15:42:04 +00:00
sllg $t1,$s0,`0+3`
srlg $t2,$s0,`8-3`
srlg $t3,$s0,`16-3`
2007-04-30 08:42:54 +00:00
srl $s0,`24-3`
nr $s0,$mask
2009-02-09 15:42:04 +00:00
ngr $t1,$mask
nr $t2,$mask
nr $t3,$mask
2007-04-30 08:42:54 +00:00
srlg $i1,$s1,`16-3` # i0
sllg $i2,$s1,`0+3`
srlg $i3,$s1,`8-3`
srl $s1,`24-3`
nr $i1,$mask
nr $s1,$mask
ngr $i2,$mask
nr $i3,$mask
2009-02-09 15:42:04 +00:00
l $s0,0($s0,$tbl) # Te0[s0>>24]
l $t1,1($t1,$tbl) # Te3[s0>>0]
l $t2,2($t2,$tbl) # Te2[s0>>8]
l $t3,3($t3,$tbl) # Te1[s0>>16]
2007-04-30 08:42:54 +00:00
x $s0,3($i1,$tbl) # Te1[s1>>16]
l $s1,0($s1,$tbl) # Te0[s1>>24]
x $t2,1($i2,$tbl) # Te3[s1>>0]
x $t3,2($i3,$tbl) # Te2[s1>>8]
srlg $i1,$s2,`8-3` # i0
srlg $i2,$s2,`16-3` # i1
nr $i1,$mask
nr $i2,$mask
2009-02-09 15:42:04 +00:00
sllg $i3,$s2,`0+3`
srl $s2,`24-3`
2007-04-30 08:42:54 +00:00
nr $s2,$mask
ngr $i3,$mask
2009-02-09 15:42:04 +00:00
xr $s1,$t1
srlg $ra,$s3,`8-3` # i1
sllg $t1,$s3,`0+3` # i0
nr $ra,$mask
la $key,16($key)
ngr $t1,$mask
2007-04-30 08:42:54 +00:00
x $s0,2($i1,$tbl) # Te2[s2>>8]
x $s1,3($i2,$tbl) # Te1[s2>>16]
l $s2,0($s2,$tbl) # Te0[s2>>24]
x $t3,1($i3,$tbl) # Te3[s2>>0]
srlg $i3,$s3,`16-3` # i2
2009-02-09 15:42:04 +00:00
xr $s2,$t2
2007-04-30 08:42:54 +00:00
srl $s3,`24-3`
nr $i3,$mask
nr $s3,$mask
x $s0,0($key)
x $s1,4($key)
x $s2,8($key)
2009-02-09 15:42:04 +00:00
x $t3,12($key)
x $s0,1($t1,$tbl) # Te3[s3>>0]
x $s1,2($ra,$tbl) # Te2[s3>>8]
x $s2,3($i3,$tbl) # Te1[s3>>16]
l $s3,0($s3,$tbl) # Te0[s3>>24]
xr $s3,$t3
2007-04-30 08:42:54 +00:00
brct $rounds,.Lenc_loop
2009-02-09 15:42:04 +00:00
.align 16
2007-04-30 08:42:54 +00:00
2009-02-09 15:42:04 +00:00
sllg $t1,$s0,`0+3`
srlg $t2,$s0,`8-3`
ngr $t1,$mask
srlg $t3,$s0,`16-3`
2007-04-30 08:42:54 +00:00
srl $s0,`24-3`
nr $s0,$mask
2009-02-09 15:42:04 +00:00
nr $t2,$mask
nr $t3,$mask
2007-04-30 08:42:54 +00:00
srlg $i1,$s1,`16-3` # i0
sllg $i2,$s1,`0+3`
2009-02-09 15:42:04 +00:00
ngr $i2,$mask
2007-04-30 08:42:54 +00:00
srlg $i3,$s1,`8-3`
srl $s1,`24-3`
nr $i1,$mask
nr $s1,$mask
nr $i3,$mask
2009-02-09 15:42:04 +00:00
llgc $s0,2($s0,$tbl) # Te4[s0>>24]
llgc $t1,2($t1,$tbl) # Te4[s0>>0]
sll $s0,24
llgc $t2,2($t2,$tbl) # Te4[s0>>8]
llgc $t3,2($t3,$tbl) # Te4[s0>>16]
sll $t2,8
sll $t3,16
2007-04-30 08:42:54 +00:00
llgc $i1,2($i1,$tbl) # Te4[s1>>16]
llgc $s1,2($s1,$tbl) # Te4[s1>>24]
llgc $i2,2($i2,$tbl) # Te4[s1>>0]
llgc $i3,2($i3,$tbl) # Te4[s1>>8]
sll $i1,16
sll $s1,24
sll $i3,8
or $s0,$i1
or $s1,$t1
or $t2,$i2
or $t3,$i3
srlg $i1,$s2,`8-3` # i0
srlg $i2,$s2,`16-3` # i1
nr $i1,$mask
nr $i2,$mask
2009-02-09 15:42:04 +00:00
sllg $i3,$s2,`0+3`
srl $s2,`24-3`
2007-04-30 08:42:54 +00:00
ngr $i3,$mask
2009-02-09 15:42:04 +00:00
nr $s2,$mask
sllg $t1,$s3,`0+3` # i0
srlg $ra,$s3,`8-3` # i1
ngr $t1,$mask
2007-04-30 08:42:54 +00:00
llgc $i1,2($i1,$tbl) # Te4[s2>>8]
llgc $i2,2($i2,$tbl) # Te4[s2>>16]
2009-02-09 15:42:04 +00:00
sll $i1,8
2007-04-30 08:42:54 +00:00
llgc $s2,2($s2,$tbl) # Te4[s2>>24]
llgc $i3,2($i3,$tbl) # Te4[s2>>0]
sll $i2,16
2009-02-09 15:42:04 +00:00
nr $ra,$mask
2007-04-30 08:42:54 +00:00
sll $s2,24
or $s0,$i1
or $s1,$i2
or $s2,$t2
or $t3,$i3
srlg $i3,$s3,`16-3` # i2
srl $s3,`24-3`
nr $i3,$mask
nr $s3,$mask
2009-02-09 15:42:04 +00:00
l $t0,16($key)
l $t2,20($key)
llgc $i1,2($t1,$tbl) # Te4[s3>>0]
llgc $i2,2($ra,$tbl) # Te4[s3>>8]
2007-04-30 08:42:54 +00:00
llgc $i3,2($i3,$tbl) # Te4[s3>>16]
llgc $s3,2($s3,$tbl) # Te4[s3>>24]
sll $i2,8
sll $i3,16
sll $s3,24
or $s0,$i1
or $s1,$i2
or $s2,$i3
or $s3,$t3
l${g} $ra,`$stdframe-$SIZE_T`($sp)
2009-02-09 15:42:04 +00:00
xr $s0,$t0
xr $s1,$t2
2007-04-30 08:42:54 +00:00
x $s2,24($key)
x $s3,28($key)
br $ra
.size _s390x_AES_encrypt,.-_s390x_AES_encrypt
___
$code.=<<___;
.type AES_Td,\@object
2009-02-09 15:42:04 +00:00
.align 256
2007-04-30 08:42:54 +00:00
AES_Td:
___
&_data_word(
0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
$code.=<<___;
2007-05-28 16:30:18 +00:00
# Td4[256]
2007-04-30 08:42:54 +00:00
.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
.size AES_Td,.-AES_Td
2007-05-28 16:30:18 +00:00
# void AES_decrypt(const unsigned char *inp, unsigned char *out,
2007-04-30 08:42:54 +00:00
# const AES_KEY *key) {
.globl AES_decrypt
.type AES_decrypt,\@function
AES_decrypt:
___
$code.=<<___ if (!$softonly);
2009-02-09 15:42:04 +00:00
l %r0,240($key)
lhi %r1,16
clr %r0,%r1
jl .Ldsoft
la %r1,0($key)
2007-05-03 09:12:47 +00:00
#la %r2,0($inp)
2007-04-30 08:42:54 +00:00
la %r4,0($out)
lghi %r3,16 # single block length
.long 0xb92e0042 # km %r4,%r2
2009-02-09 15:42:04 +00:00
brc 1,.-4 # can this happen?
br %r14
.align 64
2007-04-30 08:42:54 +00:00
.Ldsoft:
___
$code.=<<___;
stm${g} %r3,$ra,3*$SIZE_T($sp)
2007-04-30 08:42:54 +00:00
llgf $s0,0($inp)
llgf $s1,4($inp)
llgf $s2,8($inp)
llgf $s3,12($inp)
2009-02-09 15:42:04 +00:00
larl $tbl,AES_Td
2007-04-30 08:42:54 +00:00
bras $ra,_s390x_AES_decrypt
l${g} $out,3*$SIZE_T($sp)
2007-04-30 08:42:54 +00:00
st $s0,0($out)
st $s1,4($out)
st $s2,8($out)
st $s3,12($out)
lm${g} %r6,$ra,6*$SIZE_T($sp)
2007-05-28 16:30:18 +00:00
br $ra
2007-04-30 08:42:54 +00:00
.size AES_decrypt,.-AES_decrypt
.type _s390x_AES_decrypt,\@function
.align 16
_s390x_AES_decrypt:
st${g} $ra,`$stdframe-$SIZE_T`($sp)
2007-04-30 08:42:54 +00:00
x $s0,0($key)
x $s1,4($key)
x $s2,8($key)
x $s3,12($key)
l $rounds,240($key)
2009-02-09 15:42:04 +00:00
llill $mask,`0xff<<3`
2007-04-30 08:42:54 +00:00
aghi $rounds,-1
2009-02-09 15:42:04 +00:00
j .Ldec_loop
.align 16
2007-04-30 08:42:54 +00:00
.Ldec_loop:
2009-02-09 15:42:04 +00:00
srlg $t1,$s0,`16-3`
srlg $t2,$s0,`8-3`
sllg $t3,$s0,`0+3`
2007-04-30 08:42:54 +00:00
srl $s0,`24-3`
nr $s0,$mask
2009-02-09 15:42:04 +00:00
nr $t1,$mask
nr $t2,$mask
ngr $t3,$mask
2007-04-30 08:42:54 +00:00
sllg $i1,$s1,`0+3` # i0
srlg $i2,$s1,`16-3`
srlg $i3,$s1,`8-3`
srl $s1,`24-3`
ngr $i1,$mask
nr $s1,$mask
nr $i2,$mask
nr $i3,$mask
2009-02-09 15:42:04 +00:00
l $s0,0($s0,$tbl) # Td0[s0>>24]
l $t1,3($t1,$tbl) # Td1[s0>>16]
l $t2,2($t2,$tbl) # Td2[s0>>8]
l $t3,1($t3,$tbl) # Td3[s0>>0]
2007-04-30 08:42:54 +00:00
x $s0,1($i1,$tbl) # Td3[s1>>0]
l $s1,0($s1,$tbl) # Td0[s1>>24]
x $t2,3($i2,$tbl) # Td1[s1>>16]
x $t3,2($i3,$tbl) # Td2[s1>>8]
srlg $i1,$s2,`8-3` # i0
sllg $i2,$s2,`0+3` # i1
srlg $i3,$s2,`16-3`
srl $s2,`24-3`
nr $i1,$mask
ngr $i2,$mask
nr $s2,$mask
nr $i3,$mask
2009-02-09 15:42:04 +00:00
xr $s1,$t1
srlg $ra,$s3,`8-3` # i1
srlg $t1,$s3,`16-3` # i0
nr $ra,$mask
la $key,16($key)
nr $t1,$mask
2007-04-30 08:42:54 +00:00
x $s0,2($i1,$tbl) # Td2[s2>>8]
x $s1,1($i2,$tbl) # Td3[s2>>0]
l $s2,0($s2,$tbl) # Td0[s2>>24]
x $t3,3($i3,$tbl) # Td1[s2>>16]
sllg $i3,$s3,`0+3` # i2
srl $s3,`24-3`
ngr $i3,$mask
nr $s3,$mask
2009-02-09 15:42:04 +00:00
xr $s2,$t2
2007-04-30 08:42:54 +00:00
x $s0,0($key)
x $s1,4($key)
x $s2,8($key)
2009-02-09 15:42:04 +00:00
x $t3,12($key)
x $s0,3($t1,$tbl) # Td1[s3>>16]
x $s1,2($ra,$tbl) # Td2[s3>>8]
x $s2,1($i3,$tbl) # Td3[s3>>0]
l $s3,0($s3,$tbl) # Td0[s3>>24]
xr $s3,$t3
2007-04-30 08:42:54 +00:00
brct $rounds,.Ldec_loop
2009-02-09 15:42:04 +00:00
.align 16
2007-04-30 08:42:54 +00:00
l $t1,`2048+0`($tbl) # prefetch Td4
2009-02-09 15:42:04 +00:00
l $t2,`2048+64`($tbl)
l $t3,`2048+128`($tbl)
l $i1,`2048+192`($tbl)
2007-04-30 08:42:54 +00:00
llill $mask,0xff
srlg $i3,$s0,24 # i0
2009-02-09 15:42:04 +00:00
srlg $t1,$s0,16
srlg $t2,$s0,8
2007-04-30 08:42:54 +00:00
nr $s0,$mask # i3
2009-02-09 15:42:04 +00:00
nr $t1,$mask
srlg $i1,$s1,24
nr $t2,$mask
srlg $i2,$s1,16
srlg $ra,$s1,8
nr $s1,$mask # i0
2007-04-30 08:42:54 +00:00
nr $i2,$mask
2009-02-09 15:42:04 +00:00
nr $ra,$mask
2007-04-30 08:42:54 +00:00
llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
2009-02-09 15:42:04 +00:00
llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
sll $t1,16
2007-04-30 08:42:54 +00:00
llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
sllg $s0,$i3,24
sll $t2,8
llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
sll $i1,24
2009-02-09 15:42:04 +00:00
llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
2007-04-30 08:42:54 +00:00
sll $i2,16
sll $i3,8
or $s0,$s1
or $t1,$i1
or $t2,$i2
or $t3,$i3
srlg $i1,$s2,8 # i0
srlg $i2,$s2,24
srlg $i3,$s2,16
nr $s2,$mask # i1
nr $i1,$mask
nr $i3,$mask
llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
sll $i1,8
sll $i2,24
or $s0,$i1
2009-02-09 15:42:04 +00:00
sll $i3,16
2007-04-30 08:42:54 +00:00
or $t2,$i2
or $t3,$i3
srlg $i1,$s3,16 # i0
srlg $i2,$s3,8 # i1
srlg $i3,$s3,24
nr $s3,$mask # i2
nr $i1,$mask
nr $i2,$mask
2009-02-09 15:42:04 +00:00
l${g} $ra,`$stdframe-$SIZE_T`($sp)
2009-02-09 15:42:04 +00:00
or $s1,$t1
l $t0,16($key)
l $t1,20($key)
2007-04-30 08:42:54 +00:00
llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
2009-02-09 15:42:04 +00:00
sll $i1,16
2007-04-30 08:42:54 +00:00
llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
sll $i2,8
sll $s3,24
or $s0,$i1
or $s1,$i2
or $s2,$t2
or $s3,$t3
2009-02-09 15:42:04 +00:00
xr $s0,$t0
xr $s1,$t1
2007-04-30 08:42:54 +00:00
x $s2,24($key)
x $s3,28($key)
br $ra
.size _s390x_AES_decrypt,.-_s390x_AES_decrypt
2009-02-09 15:42:04 +00:00
___
2007-05-28 16:30:18 +00:00
2009-02-09 15:42:04 +00:00
$code.=<<___;
2007-05-28 16:30:18 +00:00
# void AES_set_encrypt_key(const unsigned char *in, int bits,
# AES_KEY *key) {
.globl AES_set_encrypt_key
.type AES_set_encrypt_key,\@function
.align 16
AES_set_encrypt_key:
2009-02-09 15:42:04 +00:00
lghi $t0,0
cl${g}r $inp,$t0
2007-05-28 16:30:18 +00:00
je .Lminus1
cl${g}r $key,$t0
2007-05-28 16:30:18 +00:00
je .Lminus1
2009-02-09 15:42:04 +00:00
lghi $t0,128
clr $bits,$t0
je .Lproceed
lghi $t0,192
clr $bits,$t0
je .Lproceed
lghi $t0,256
clr $bits,$t0
je .Lproceed
2007-05-28 16:30:18 +00:00
lghi %r2,-2
br %r14
2009-02-09 15:42:04 +00:00
.align 16
.Lproceed:
___
$code.=<<___ if (!$softonly);
2009-02-09 15:42:04 +00:00
# convert bits to km code, [128,192,256]->[18,19,20]
lhi %r5,-128
lhi %r0,18
ar %r5,$bits
srl %r5,6
ar %r5,%r0
larl %r1,OPENSSL_s390xcap_P
lg %r0,0(%r1)
tmhl %r0,0x4000 # check for message-security assist
jz .Lekey_internal
2007-05-28 16:30:18 +00:00
lghi %r0,0 # query capability vector
la %r1,16($sp)
2009-02-09 15:42:04 +00:00
.long 0xb92f0042 # kmc %r4,%r2
llihh %r1,0x8000
srlg %r1,%r1,0(%r5)
ng %r1,16($sp)
2007-05-28 16:30:18 +00:00
jz .Lekey_internal
2009-02-09 15:42:04 +00:00
lmg %r0,%r1,0($inp) # just copy 128 bits...
stmg %r0,%r1,0($key)
lhi %r0,192
cr $bits,%r0
jl 1f
lg %r1,16($inp)
stg %r1,16($key)
je 1f
lg %r1,24($inp)
stg %r1,24($key)
1: st $bits,236($key) # save bits
st %r5,240($key) # save km code
2007-05-28 16:30:18 +00:00
lghi %r2,0
br %r14
___
$code.=<<___;
2007-05-28 16:30:18 +00:00
.align 16
.Lekey_internal:
stm${g} %r6,%r13,6*$SIZE_T($sp) # all non-volatile regs
2007-05-28 16:30:18 +00:00
larl $tbl,AES_Te+2048
2007-05-28 16:30:18 +00:00
llgf $s0,0($inp)
llgf $s1,4($inp)
llgf $s2,8($inp)
llgf $s3,12($inp)
st $s0,0($key)
st $s1,4($key)
st $s2,8($key)
st $s3,12($key)
2009-02-09 15:42:04 +00:00
lghi $t0,128
cr $bits,$t0
2007-05-28 16:30:18 +00:00
jne .Lnot128
llill $mask,0xff
lghi $t3,0 # i=0
lghi $rounds,10
st $rounds,240($key)
llgfr $t2,$s3 # temp=rk[3]
srlg $i1,$s3,8
srlg $i2,$s3,16
srlg $i3,$s3,24
nr $t2,$mask
nr $i1,$mask
nr $i2,$mask
2009-02-09 15:42:04 +00:00
.align 16
.L128_loop:
2007-05-28 16:30:18 +00:00
la $t2,0($t2,$tbl)
la $i1,0($i1,$tbl)
la $i2,0($i2,$tbl)
la $i3,0($i3,$tbl)
icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
icm $t2,1,0($i3) # Te4[rk[3]>>24]
x $t2,256($t3,$tbl) # rcon[i]
xr $s0,$t2 # rk[4]=rk[0]^...
xr $s1,$s0 # rk[5]=rk[1]^rk[4]
xr $s2,$s1 # rk[6]=rk[2]^rk[5]
xr $s3,$s2 # rk[7]=rk[3]^rk[6]
2009-02-09 15:42:04 +00:00
llgfr $t2,$s3 # temp=rk[3]
srlg $i1,$s3,8
srlg $i2,$s3,16
nr $t2,$mask
nr $i1,$mask
srlg $i3,$s3,24
nr $i2,$mask
2007-05-28 16:30:18 +00:00
st $s0,16($key)
st $s1,20($key)
st $s2,24($key)
st $s3,28($key)
la $key,16($key) # key+=4
la $t3,4($t3) # i++
brct $rounds,.L128_loop
lghi %r2,0
lm${g} %r6,%r13,6*$SIZE_T($sp)
2007-05-28 16:30:18 +00:00
br $ra
2009-02-09 15:42:04 +00:00
.align 16
2007-05-28 16:30:18 +00:00
.Lnot128:
2009-02-09 15:42:04 +00:00
llgf $t0,16($inp)
llgf $t1,20($inp)
st $t0,16($key)
st $t1,20($key)
lghi $t0,192
cr $bits,$t0
2007-05-28 16:30:18 +00:00
jne .Lnot192
llill $mask,0xff
lghi $t3,0 # i=0
lghi $rounds,12
st $rounds,240($key)
lghi $rounds,8
2009-02-09 15:42:04 +00:00
srlg $i1,$t1,8
srlg $i2,$t1,16
srlg $i3,$t1,24
nr $t1,$mask
2007-05-28 16:30:18 +00:00
nr $i1,$mask
nr $i2,$mask
2009-02-09 15:42:04 +00:00
.align 16
.L192_loop:
la $t1,0($t1,$tbl)
2007-05-28 16:30:18 +00:00
la $i1,0($i1,$tbl)
la $i2,0($i2,$tbl)
la $i3,0($i3,$tbl)
2009-02-09 15:42:04 +00:00
icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
icm $t1,1,0($i3) # Te4[rk[5]>>24]
x $t1,256($t3,$tbl) # rcon[i]
xr $s0,$t1 # rk[6]=rk[0]^...
2007-05-28 16:30:18 +00:00
xr $s1,$s0 # rk[7]=rk[1]^rk[6]
xr $s2,$s1 # rk[8]=rk[2]^rk[7]
xr $s3,$s2 # rk[9]=rk[3]^rk[8]
2009-02-09 15:42:04 +00:00
2007-05-28 16:30:18 +00:00
st $s0,24($key)
st $s1,28($key)
st $s2,32($key)
st $s3,36($key)
brct $rounds,.L192_continue
lghi %r2,0
lm${g} %r6,%r13,6*$SIZE_T($sp)
2007-05-28 16:30:18 +00:00
br $ra
2009-02-09 15:42:04 +00:00
.align 16
2007-05-28 16:30:18 +00:00
.L192_continue:
2009-02-09 15:42:04 +00:00
lgr $t1,$s3
x $t1,16($key) # rk[10]=rk[4]^rk[9]
st $t1,40($key)
x $t1,20($key) # rk[11]=rk[5]^rk[10]
st $t1,44($key)
srlg $i1,$t1,8
srlg $i2,$t1,16
srlg $i3,$t1,24
nr $t1,$mask
nr $i1,$mask
nr $i2,$mask
2007-05-28 16:30:18 +00:00
la $key,24($key) # key+=6
la $t3,4($t3) # i++
j .L192_loop
2009-02-09 15:42:04 +00:00
.align 16
2007-05-28 16:30:18 +00:00
.Lnot192:
2009-02-09 15:42:04 +00:00
llgf $t0,24($inp)
llgf $t1,28($inp)
st $t0,24($key)
st $t1,28($key)
2007-05-28 16:30:18 +00:00
llill $mask,0xff
lghi $t3,0 # i=0
lghi $rounds,14
st $rounds,240($key)
lghi $rounds,7
2009-02-09 15:42:04 +00:00
srlg $i1,$t1,8
srlg $i2,$t1,16
srlg $i3,$t1,24
nr $t1,$mask
2007-05-28 16:30:18 +00:00
nr $i1,$mask
nr $i2,$mask
2009-02-09 15:42:04 +00:00
.align 16
.L256_loop:
la $t1,0($t1,$tbl)
2007-05-28 16:30:18 +00:00
la $i1,0($i1,$tbl)
la $i2,0($i2,$tbl)
la $i3,0($i3,$tbl)
2009-02-09 15:42:04 +00:00
icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
icm $t1,1,0($i3) # Te4[rk[7]>>24]
x $t1,256($t3,$tbl) # rcon[i]
xr $s0,$t1 # rk[8]=rk[0]^...
2007-05-28 16:30:18 +00:00
xr $s1,$s0 # rk[9]=rk[1]^rk[8]
xr $s2,$s1 # rk[10]=rk[2]^rk[9]
xr $s3,$s2 # rk[11]=rk[3]^rk[10]
st $s0,32($key)
st $s1,36($key)
st $s2,40($key)
st $s3,44($key)
brct $rounds,.L256_continue
lghi %r2,0
lm${g} %r6,%r13,6*$SIZE_T($sp)
2007-05-28 16:30:18 +00:00
br $ra
2009-02-09 15:42:04 +00:00
.align 16
2007-05-28 16:30:18 +00:00
.L256_continue:
2009-02-09 15:42:04 +00:00
lgr $t1,$s3 # temp=rk[11]
2007-05-28 16:30:18 +00:00
srlg $i1,$s3,8
srlg $i2,$s3,16
srlg $i3,$s3,24
2009-02-09 15:42:04 +00:00
nr $t1,$mask
2007-05-28 16:30:18 +00:00
nr $i1,$mask
nr $i2,$mask
2009-02-09 15:42:04 +00:00
la $t1,0($t1,$tbl)
2007-05-28 16:30:18 +00:00
la $i1,0($i1,$tbl)
la $i2,0($i2,$tbl)
la $i3,0($i3,$tbl)
2009-02-09 15:42:04 +00:00
llgc $t1,0($t1) # Te4[rk[11]>>0]
icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
x $t1,16($key) # rk[12]=rk[4]^...
st $t1,48($key)
x $t1,20($key) # rk[13]=rk[5]^rk[12]
st $t1,52($key)
x $t1,24($key) # rk[14]=rk[6]^rk[13]
st $t1,56($key)
x $t1,28($key) # rk[15]=rk[7]^rk[14]
st $t1,60($key)
srlg $i1,$t1,8
srlg $i2,$t1,16
srlg $i3,$t1,24
nr $t1,$mask
nr $i1,$mask
nr $i2,$mask
2007-05-28 16:30:18 +00:00
la $key,32($key) # key+=8
la $t3,4($t3) # i++
j .L256_loop
2009-02-09 15:42:04 +00:00
2007-05-28 16:30:18 +00:00
.Lminus1:
lghi %r2,-1
2009-02-09 15:42:04 +00:00
br $ra
2007-05-28 16:30:18 +00:00
.size AES_set_encrypt_key,.-AES_set_encrypt_key
# void AES_set_decrypt_key(const unsigned char *in, int bits,
# AES_KEY *key) {
.globl AES_set_decrypt_key
.type AES_set_decrypt_key,\@function
.align 16
AES_set_decrypt_key:
st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to
st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers!
2007-05-28 16:30:18 +00:00
bras $ra,AES_set_encrypt_key
l${g} $key,4*$SIZE_T($sp)
l${g} $ra,14*$SIZE_T($sp)
2007-05-28 16:30:18 +00:00
ltgr %r2,%r2
bnzr $ra
___
$code.=<<___ if (!$softonly);
2009-02-09 15:42:04 +00:00
l $t0,240($key)
lhi $t1,16
cr $t0,$t1
jl .Lgo
oill $t0,0x80 # set "decrypt" bit
st $t0,240($key)
2007-05-28 16:30:18 +00:00
br $ra
.align 16
.Ldkey_internal:
st${g} $key,4*$SIZE_T($sp)
st${g} $ra,14*$SIZE_T($sp)
2007-05-28 16:30:18 +00:00
bras $ra,.Lekey_internal
l${g} $key,4*$SIZE_T($sp)
l${g} $ra,14*$SIZE_T($sp)
___
$code.=<<___;
2007-05-28 16:30:18 +00:00
.Lgo: llgf $rounds,240($key)
la $i1,0($key)
2007-05-28 16:30:18 +00:00
sllg $i2,$rounds,4
la $i2,0($i2,$key)
2007-05-28 16:30:18 +00:00
srl $rounds,1
2009-02-09 15:42:04 +00:00
lghi $t1,-16
2007-05-28 16:30:18 +00:00
2009-02-09 15:42:04 +00:00
.align 16
.Linv: lmg $s0,$s1,0($i1)
lmg $s2,$s3,0($i2)
stmg $s0,$s1,0($i2)
stmg $s2,$s3,0($i1)
2009-02-09 15:42:04 +00:00
la $i1,16($i1)
la $i2,0($t1,$i2)
2007-05-28 16:30:18 +00:00
brct $rounds,.Linv
___
$mask80=$i1;
$mask1b=$i2;
$maskfe=$i3;
$code.=<<___;
llgf $rounds,240($key)
aghi $rounds,-1
sll $rounds,2 # (rounds-1)*4
llilh $mask80,0x8080
llilh $mask1b,0x1b1b
llilh $maskfe,0xfefe
2009-02-09 15:42:04 +00:00
oill $mask80,0x8080
oill $mask1b,0x1b1b
2007-05-28 16:30:18 +00:00
oill $maskfe,0xfefe
2009-02-09 15:42:04 +00:00
.align 16
2007-05-28 16:30:18 +00:00
.Lmix: l $s0,16($key) # tp1
lr $s1,$s0
ngr $s1,$mask80
srlg $t1,$s1,7
slr $s1,$t1
nr $s1,$mask1b
sllg $t1,$s0,1
nr $t1,$maskfe
xr $s1,$t1 # tp2
lr $s2,$s1
ngr $s2,$mask80
srlg $t1,$s2,7
slr $s2,$t1
nr $s2,$mask1b
sllg $t1,$s1,1
nr $t1,$maskfe
xr $s2,$t1 # tp4
lr $s3,$s2
ngr $s3,$mask80
srlg $t1,$s3,7
slr $s3,$t1
nr $s3,$mask1b
sllg $t1,$s2,1
nr $t1,$maskfe
xr $s3,$t1 # tp8
xr $s1,$s0 # tp2^tp1
xr $s2,$s0 # tp4^tp1
rll $s0,$s0,24 # = ROTATE(tp1,8)
2009-02-09 15:42:04 +00:00
xr $s2,$s3 # ^=tp8
2007-05-28 16:30:18 +00:00
xr $s0,$s1 # ^=tp2^tp1
xr $s1,$s3 # tp2^tp1^tp8
2009-02-09 15:42:04 +00:00
xr $s0,$s2 # ^=tp4^tp1^tp8
2007-05-28 16:30:18 +00:00
rll $s1,$s1,8
rll $s2,$s2,16
2009-02-09 15:42:04 +00:00
xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
2007-05-28 16:30:18 +00:00
rll $s3,$s3,24
2009-02-09 15:42:04 +00:00
xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
2007-05-28 16:30:18 +00:00
xr $s0,$s3 # ^= ROTATE(tp8,8)
st $s0,16($key)
la $key,4($key)
brct $rounds,.Lmix
lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key!
2007-05-28 16:30:18 +00:00
lghi %r2,0
br $ra
.size AES_set_decrypt_key,.-AES_set_decrypt_key
2009-02-09 15:42:04 +00:00
___
#void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
# size_t length, const AES_KEY *key,
# unsigned char *ivec, const int enc)
{
my $inp="%r2";
my $out="%r4"; # length and out are swapped
my $len="%r3";
my $key="%r5";
my $ivp="%r6";
$code.=<<___;
.globl AES_cbc_encrypt
.type AES_cbc_encrypt,\@function
.align 16
AES_cbc_encrypt:
xgr %r3,%r4 # flip %r3 and %r4, out and len
xgr %r4,%r3
xgr %r3,%r4
___
$code.=<<___ if (!$softonly);
lhi %r0,16
cl %r0,240($key)
jh .Lcbc_software
lg %r0,0($ivp) # copy ivec
lg %r1,8($ivp)
stmg %r0,%r1,16($sp)
lmg %r0,%r1,0($key) # copy key, cover 256 bit
stmg %r0,%r1,32($sp)
lmg %r0,%r1,16($key)
stmg %r0,%r1,48($sp)
l %r0,240($key) # load kmc code
lghi $key,15 # res=len%16, len-=res;
ngr $key,$len
sl${g}r $len,$key
2009-02-09 15:42:04 +00:00
la %r1,16($sp) # parameter block - ivec || key
jz .Lkmc_truncated
.long 0xb92f0042 # kmc %r4,%r2
brc 1,.-4 # pay attention to "partial completion"
ltr $key,$key
jnz .Lkmc_truncated
.Lkmc_done:
lmg %r0,%r1,16($sp) # copy ivec to caller
stg %r0,0($ivp)
stg %r1,8($ivp)
br $ra
.align 16
.Lkmc_truncated:
ahi $key,-1 # it's the way it's encoded in mvc
tmll %r0,0x80
jnz .Lkmc_truncated_dec
lghi %r1,0
stg %r1,16*$SIZE_T($sp)
stg %r1,16*$SIZE_T+8($sp)
2009-02-09 15:42:04 +00:00
bras %r1,1f
mvc 16*$SIZE_T(1,$sp),0($inp)
2009-02-09 15:42:04 +00:00
1: ex $key,0(%r1)
la %r1,16($sp) # restore parameter block
la $inp,16*$SIZE_T($sp)
2009-02-09 15:42:04 +00:00
lghi $len,16
.long 0xb92f0042 # kmc %r4,%r2
j .Lkmc_done
.align 16
.Lkmc_truncated_dec:
st${g} $out,4*$SIZE_T($sp)
la $out,16*$SIZE_T($sp)
2009-02-09 15:42:04 +00:00
lghi $len,16
.long 0xb92f0042 # kmc %r4,%r2
l${g} $out,4*$SIZE_T($sp)
2009-02-09 15:42:04 +00:00
bras %r1,2f
mvc 0(1,$out),16*$SIZE_T($sp)
2009-02-09 15:42:04 +00:00
2: ex $key,0(%r1)
j .Lkmc_done
.align 16
.Lcbc_software:
___
$code.=<<___;
stm${g} $key,$ra,5*$SIZE_T($sp)
2009-02-09 15:42:04 +00:00
lhi %r0,0
cl %r0,`$stdframe+$SIZE_T-4`($sp)
2009-02-09 15:42:04 +00:00
je .Lcbc_decrypt
larl $tbl,AES_Te
llgf $s0,0($ivp)
llgf $s1,4($ivp)
llgf $s2,8($ivp)
llgf $s3,12($ivp)
lghi $t0,16
sl${g}r $len,$t0
2009-02-09 15:42:04 +00:00
brc 4,.Lcbc_enc_tail # if borrow
.Lcbc_enc_loop:
stm${g} $inp,$out,2*$SIZE_T($sp)
2009-02-09 15:42:04 +00:00
x $s0,0($inp)
x $s1,4($inp)
x $s2,8($inp)
x $s3,12($inp)
lgr %r4,$key
bras $ra,_s390x_AES_encrypt
lm${g} $inp,$key,2*$SIZE_T($sp)
2009-02-09 15:42:04 +00:00
st $s0,0($out)
st $s1,4($out)
st $s2,8($out)
st $s3,12($out)
la $inp,16($inp)
la $out,16($out)
lghi $t0,16
lt${g}r $len,$len
2009-02-09 15:42:04 +00:00
jz .Lcbc_enc_done
sl${g}r $len,$t0
2009-02-09 15:42:04 +00:00
brc 4,.Lcbc_enc_tail # if borrow
j .Lcbc_enc_loop
.align 16
.Lcbc_enc_done:
l${g} $ivp,6*$SIZE_T($sp)
2009-02-09 15:42:04 +00:00
st $s0,0($ivp)
st $s1,4($ivp)
st $s2,8($ivp)
st $s3,12($ivp)
lm${g} %r7,$ra,7*$SIZE_T($sp)
2009-02-09 15:42:04 +00:00
br $ra
.align 16
.Lcbc_enc_tail:
aghi $len,15
lghi $t0,0
stg $t0,16*$SIZE_T($sp)
stg $t0,16*$SIZE_T+8($sp)
2009-02-09 15:42:04 +00:00
bras $t1,3f
mvc 16*$SIZE_T(1,$sp),0($inp)
2009-02-09 15:42:04 +00:00
3: ex $len,0($t1)
lghi $len,0
la $inp,16*$SIZE_T($sp)
2009-02-09 15:42:04 +00:00
j .Lcbc_enc_loop
.align 16
.Lcbc_decrypt:
larl $tbl,AES_Td
lg $t0,0($ivp)
lg $t1,8($ivp)
stmg $t0,$t1,16*$SIZE_T($sp)
2009-02-09 15:42:04 +00:00
.Lcbc_dec_loop:
stm${g} $inp,$out,2*$SIZE_T($sp)
2009-02-09 15:42:04 +00:00
llgf $s0,0($inp)
llgf $s1,4($inp)
llgf $s2,8($inp)
llgf $s3,12($inp)
lgr %r4,$key
bras $ra,_s390x_AES_decrypt
lm${g} $inp,$key,2*$SIZE_T($sp)
2009-02-09 15:42:04 +00:00
sllg $s0,$s0,32
sllg $s2,$s2,32
lr $s0,$s1
lr $s2,$s3
lg $t0,0($inp)
lg $t1,8($inp)
xg $s0,16*$SIZE_T($sp)
xg $s2,16*$SIZE_T+8($sp)
2009-02-09 15:42:04 +00:00
lghi $s1,16
sl${g}r $len,$s1
2009-02-09 15:42:04 +00:00
brc 4,.Lcbc_dec_tail # if borrow
brc 2,.Lcbc_dec_done # if zero
stg $s0,0($out)
stg $s2,8($out)
stmg $t0,$t1,16*$SIZE_T($sp)
2009-02-09 15:42:04 +00:00
la $inp,16($inp)
la $out,16($out)
j .Lcbc_dec_loop
.Lcbc_dec_done:
stg $s0,0($out)
stg $s2,8($out)
.Lcbc_dec_exit:
lm${g} %r6,$ra,6*$SIZE_T($sp)
2009-02-09 15:42:04 +00:00
stmg $t0,$t1,0($ivp)
br $ra
.align 16
.Lcbc_dec_tail:
aghi $len,15
stg $s0,16*$SIZE_T($sp)
stg $s2,16*$SIZE_T+8($sp)
2009-02-09 15:42:04 +00:00
bras $s1,4f
mvc 0(1,$out),16*$SIZE_T($sp)
2009-02-09 15:42:04 +00:00
4: ex $len,0($s1)
j .Lcbc_dec_exit
.size AES_cbc_encrypt,.-AES_cbc_encrypt
___
}
#void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
# size_t blocks, const AES_KEY *key,
# const unsigned char *ivec)
{
my $inp="%r2";
my $out="%r3";
my $len="%r4";
my $key="%r5"; my $iv0="%r5";
my $ivp="%r6";
my $fp ="%r7";
$code.=<<___;
.globl AES_ctr32_encrypt
.type AES_ctr32_encrypt,\@function
.align 16
AES_ctr32_encrypt:
llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case
___
$code.=<<___ if (!$softonly);
l %r0,240($key)
lhi %r1,16
clr %r0,%r1
jl .Lctr32_software
stm${g} %r6,$s3,6*$SIZE_T($sp)
slgr $out,$inp
la %r1,0($key) # %r1 is permanent copy of $key
lg $iv0,0($ivp) # load ivec
lg $ivp,8($ivp)
# prepare and allocate stack frame at the top of 4K page
# with 1K reserved for eventual signal handling
lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
lghi $s1,-4096
algr $s0,$sp
lgr $fp,$sp
ngr $s0,$s1 # align at page boundary
slgr $fp,$s0 # total buffer size
lgr $s2,$sp
lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
slgr $fp,$s1 # deduct reservation to get usable buffer size
# buffer size is at lest 256 and at most 3072+256-16
la $sp,1024($s0) # alloca
srlg $fp,$fp,4 # convert bytes to blocks, minimum 16
st${g} $s2,0($sp) # back-chain
st${g} $fp,$SIZE_T($sp)
slgr $len,$fp
brc 1,.Lctr32_hw_loop # not zero, no borrow
algr $fp,$len # input is shorter than allocated buffer
lghi $len,0
st${g} $fp,$SIZE_T($sp)
.Lctr32_hw_loop:
la $s2,16($sp)
lgr $s3,$fp
.Lctr32_hw_prepare:
stg $iv0,0($s2)
stg $ivp,8($s2)
la $s2,16($s2)
ahi $ivp,1 # 32-bit increment, preserves upper half
brct $s3,.Lctr32_hw_prepare
la $s0,16($sp) # inp
sllg $s1,$fp,4 # len
la $s2,16($sp) # out
.long 0xb92e00a8 # km %r10,%r8
brc 1,.-4 # pay attention to "partial completion"
la $s2,16($sp)
lgr $s3,$fp
slgr $s2,$inp
.Lctr32_hw_xor:
lg $s0,0($inp)
lg $s1,8($inp)
xg $s0,0($s2,$inp)
xg $s1,8($s2,$inp)
stg $s0,0($out,$inp)
stg $s1,8($out,$inp)
la $inp,16($inp)
brct $s3,.Lctr32_hw_xor
slgr $len,$fp
brc 1,.Lctr32_hw_loop # not zero, no borrow
algr $fp,$len
lghi $len,0
brc 4+1,.Lctr32_hw_loop # not zero
l${g} $s0,0($sp)
l${g} $s1,$SIZE_T($sp)
la $s2,16($sp)
.Lctr32_hw_zap:
stg $s0,0($s2)
stg $s0,8($s2)
la $s2,16($s2)
brct $s1,.Lctr32_hw_zap
la $sp,0($s0)
lm${g} %r6,$s3,6*$SIZE_T($sp)
br $ra
.align 16
.Lctr32_software:
___
$code.=<<___;
stm${g} $key,$ra,5*$SIZE_T($sp)
sl${g}r $out,$inp
larl $tbl,AES_Te
llgf $t1,12($ivp)
.Lctr32_loop:
stm${g} $inp,$len,2*$SIZE_T($sp)
llgf $s0,0($ivp)
llgf $s1,4($ivp)
llgf $s2,8($ivp)
lgr $s3,$t1
st $t1,16*$SIZE_T($sp)
lgr %r4,$key
bras $ra,_s390x_AES_encrypt
lm${g} $inp,$ivp,2*$SIZE_T($sp)
llgf $t1,16*$SIZE_T($sp)
x $s0,0($inp)
x $s1,4($inp)
x $s2,8($inp)
x $s3,12($inp)
st $s0,0($out,$inp)
st $s1,4($out,$inp)
st $s2,8($out,$inp)
st $s3,12($out,$inp)
la $inp,16($inp)
ahi $t1,1 # 32-bit increment
brct $len,.Lctr32_loop
lm${g} %r6,$ra,6*$SIZE_T($sp)
br $ra
.size AES_ctr32_encrypt,.-AES_ctr32_encrypt
2009-02-09 15:42:04 +00:00
___
}
$code.=<<___;
.comm OPENSSL_s390xcap_P,16,8
2007-04-30 08:42:54 +00:00
.string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
___
$code =~ s/\`([^\`]*)\`/eval $1/gem;
print $code;
close STDOUT; # force flush