x86 assembly pack: update performance results.
Reviewed-by: Richard Levitte <levitte@openssl.org>
This commit is contained in:
parent
f15eed3b79
commit
a30b0522cb
7 changed files with 13 additions and 0 deletions
|
@ -62,7 +62,9 @@
|
|||
# Westmere 3.77/1.37 1.37 1.52 1.27
|
||||
# * Bridge 5.07/0.98 0.99 1.09 0.91 1.10
|
||||
# Haswell 4.44/0.80 0.97 1.03 0.72 0.76
|
||||
# Skylake 2.68/0.65 0.65 0.66 0.64 0.66
|
||||
# Silvermont 5.77/3.56 3.67 4.03 3.46 4.03
|
||||
# Goldmont 3.84/1.39 1.39 1.63 1.31 1.70
|
||||
# Bulldozer 5.80/0.98 1.05 1.24 0.93 1.23
|
||||
|
||||
$PREFIX="aesni"; # if $PREFIX is set to "AES", the script
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
# Westmere 9.50/+45% 3.35
|
||||
# Sandy Bridge 10.5/+47% 3.20
|
||||
# Haswell 8.15/+50% 2.83
|
||||
# Skylake 7.53/+22% 2.75
|
||||
# Silvermont 17.4/+36% 8.35
|
||||
# Goldmont 13.4/+40% 4.36
|
||||
# Sledgehammer 10.2/+54%
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
# Sandy Bridge 8.31/+42% 5.45/6.76 2.72
|
||||
# Ivy Bridge 6.71/+46% 5.40/6.49 2.41
|
||||
# Haswell 5.92/+43% 5.20/6.45 2.42 1.23
|
||||
# Skylake 5.87/+39% 4.70/- 2.31 1.19
|
||||
# Silvermont 12.0/+33% 7.75/7.40 7.03(iii)
|
||||
# Goldmont 10.6/+17% 5.10/- 3.28
|
||||
# Sledgehammer 7.28/+52% -/14.2(ii) -
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
# Westmere 4.58/+100% 1.43
|
||||
# Sandy Bridge 3.90/+100% 1.36
|
||||
# Haswell 3.88/+70% 1.18 0.72
|
||||
# Skylake 3.10/+60% 1.14 0.62
|
||||
# Silvermont 11.0/+40% 4.80
|
||||
# Goldmont 4.10/+200% 2.10
|
||||
# VIA Nano 6.71/+90% 2.47
|
||||
|
|
|
@ -104,10 +104,12 @@
|
|||
# Sandy Bridge 8.8 6.2/+40% 5.1(**)/+73%
|
||||
# Ivy Bridge 7.2 4.8/+51% 4.7(**)/+53%
|
||||
# Haswell 6.5 4.3/+51% 4.1(**)/+58%
|
||||
# Skylake 6.4 4.1/+55% 4.1(**)/+55%
|
||||
# Bulldozer 11.6 6.0/+92%
|
||||
# VIA Nano 10.6 7.5/+41%
|
||||
# Atom 12.5 9.3(*)/+35%
|
||||
# Silvermont 14.5 9.9(*)/+46%
|
||||
# Goldmont 8.8 6.7/+30% 1.7(***)/+415%
|
||||
#
|
||||
# (*) Loop is 1056 instructions long and expected result is ~8.25.
|
||||
# The discrepancy is because of front-end limitations, so
|
||||
|
@ -115,6 +117,8 @@
|
|||
# limited parallelism.
|
||||
#
|
||||
# (**) As per above comment, the result is for AVX *plus* sh[rl]d.
|
||||
#
|
||||
# (***) SHAEXT result
|
||||
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
push(@INC,"${dir}","${dir}../../perlasm");
|
||||
|
|
|
@ -57,14 +57,17 @@
|
|||
# Sandy Bridge 25 - 15.9 12.4 11.6
|
||||
# Ivy Bridge 24 - 15.0 11.4 10.3
|
||||
# Haswell 22 - 13.9 9.46 7.80
|
||||
# Skylake 20 - 14.9 9.50 7.70
|
||||
# Bulldozer 36 - 27/22 17.0 13.6
|
||||
# VIA Nano 36 - 25/22 16.8 16.5
|
||||
# Atom 50 - 30/25 21.9 18.9
|
||||
# Silvermont 40 - 34/31 22.9 20.6
|
||||
# Goldmont 29 - 20 16.3(***)
|
||||
#
|
||||
# (*) numbers after slash are for unrolled loop, where applicable;
|
||||
# (**) x86_64 assembly performance is presented for reference
|
||||
# purposes, results are best-available;
|
||||
# (***) SHAEXT result is 4.1, strangely enough better than 64-bit one;
|
||||
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
push(@INC,"${dir}","${dir}../../perlasm");
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
# Sandy Bridge 58 - 35 11.9 11.2
|
||||
# Ivy Bridge 50 - 33 11.5 8.17
|
||||
# Haswell 46 - 29 11.3 7.66
|
||||
# Skylake 40 - 26 13.3 7.25
|
||||
# Bulldozer 121 - 50 14.0 13.5
|
||||
# VIA Nano 91 - 52 33 14.7
|
||||
# Atom 126 - 68 48(***) 14.7
|
||||
|
|
Loading…
Reference in a new issue