From 1b1ff9b94d5cfa7879ef6a1a4101fe4db9cb9a9c Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Fri, 8 Mar 2019 14:40:56 +0100 Subject: [PATCH] sha/asm/keccak1600-ppc64.pl: up 10% performance improvement. Reviewed-by: Matt Caswell Reviewed-by: Richard Levitte (Merged from https://github.com/openssl/openssl/pull/8444) --- crypto/sha/asm/keccak1600-ppc64.pl | 40 +++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/crypto/sha/asm/keccak1600-ppc64.pl b/crypto/sha/asm/keccak1600-ppc64.pl index 5c23841d6a..876632b1e7 100755 --- a/crypto/sha/asm/keccak1600-ppc64.pl +++ b/crypto/sha/asm/keccak1600-ppc64.pl @@ -27,10 +27,10 @@ # # r=1088(*) # -# PPC970/G5 14.6/+120% -# POWER7 10.3/+100% -# POWER8 11.5/+85% -# POWER9 9.4/+45% +# PPC970/G5 14.0/+130% +# POWER7 9.7/+110% +# POWER8 10.6/+100% +# POWER9 8.2/+66% # # (*) Corresponds to SHA3-256. Percentage after slash is improvement # over gcc-4.x-generated KECCAK_1X_ALT code. Newer compilers do @@ -384,19 +384,19 @@ KeccakF1600: .type dword_le_load,\@function .align 5 dword_le_load: - lbzu r0,1(r3) - lbzu r4,1(r3) - lbzu r5,1(r3) + lbz r0,1(r3) + lbz r4,2(r3) + lbz r5,3(r3) insrdi r0,r4,8,48 - lbzu r4,1(r3) + lbz r4,4(r3) insrdi r0,r5,8,40 - lbzu r5,1(r3) + lbz r5,5(r3) insrdi r0,r4,8,32 - lbzu r4,1(r3) + lbz r4,6(r3) insrdi r0,r5,8,24 - lbzu r5,1(r3) + lbz r5,7(r3) insrdi r0,r4,8,16 - lbzu r4,1(r3) + lbzu r4,8(r3) insrdi r0,r5,8,8 insrdi r0,r4,8,0 blr @@ -657,21 +657,21 @@ SHA3_squeeze: ${UCMP}i $len,8 blt .Lsqueeze_tail - stbu r0,1($out) + stb r0,1($out) srdi r0,r0,8 - stbu r0,1($out) + stb r0,2($out) srdi r0,r0,8 - stbu r0,1($out) + stb r0,3($out) srdi r0,r0,8 - stbu r0,1($out) + stb r0,4($out) srdi r0,r0,8 - stbu r0,1($out) + stb r0,5($out) srdi r0,r0,8 - stbu r0,1($out) + stb r0,6($out) srdi r0,r0,8 - stbu r0,1($out) + stb r0,7($out) srdi r0,r0,8 - stbu r0,1($out) + stbu r0,8($out) subic. $len,$len,8 beq .Lsqueeze_done