sha/asm/keccak1600-ppc64.pl: up 10% performance improvement.

Reviewed-by: Matt Caswell <matt@openssl.org>
Reviewed-by: Richard Levitte <levitte@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/8444)
This commit is contained in:
Andy Polyakov 2019-03-08 14:40:56 +01:00 committed by Richard Levitte
parent 3dcbb6c4a3
commit 1b1ff9b94d

View file

@ -27,10 +27,10 @@
#
# r=1088(*)
#
# PPC970/G5 14.6/+120%
# POWER7 10.3/+100%
# POWER8 11.5/+85%
# POWER9 9.4/+45%
# PPC970/G5 14.0/+130%
# POWER7 9.7/+110%
# POWER8 10.6/+100%
# POWER9 8.2/+66%
#
# (*) Corresponds to SHA3-256. Percentage after slash is improvement
# over gcc-4.x-generated KECCAK_1X_ALT code. Newer compilers do
@ -384,19 +384,19 @@ KeccakF1600:
.type dword_le_load,\@function
.align 5
dword_le_load:
lbzu r0,1(r3)
lbzu r4,1(r3)
lbzu r5,1(r3)
lbz r0,1(r3)
lbz r4,2(r3)
lbz r5,3(r3)
insrdi r0,r4,8,48
lbzu r4,1(r3)
lbz r4,4(r3)
insrdi r0,r5,8,40
lbzu r5,1(r3)
lbz r5,5(r3)
insrdi r0,r4,8,32
lbzu r4,1(r3)
lbz r4,6(r3)
insrdi r0,r5,8,24
lbzu r5,1(r3)
lbz r5,7(r3)
insrdi r0,r4,8,16
lbzu r4,1(r3)
lbzu r4,8(r3)
insrdi r0,r5,8,8
insrdi r0,r4,8,0
blr
@ -657,21 +657,21 @@ SHA3_squeeze:
${UCMP}i $len,8
blt .Lsqueeze_tail
stbu r0,1($out)
stb r0,1($out)
srdi r0,r0,8
stbu r0,1($out)
stb r0,2($out)
srdi r0,r0,8
stbu r0,1($out)
stb r0,3($out)
srdi r0,r0,8
stbu r0,1($out)
stb r0,4($out)
srdi r0,r0,8
stbu r0,1($out)
stb r0,5($out)
srdi r0,r0,8
stbu r0,1($out)
stb r0,6($out)
srdi r0,r0,8
stbu r0,1($out)
stb r0,7($out)
srdi r0,r0,8
stbu r0,1($out)
stbu r0,8($out)
subic. $len,$len,8
beq .Lsqueeze_done