x86_64 assembly pack: add Goldmont performance results.
Reviewed-by: Richard Levitte <levitte@openssl.org>
This commit is contained in:
parent
c3086f4630
commit
ace05265d2
11 changed files with 14 additions and 1 deletions
|
@ -179,6 +179,7 @@
|
|||
# Haswell 4.44/0.63 0.63 0.73 0.63 0.70
|
||||
# Skylake 2.62/0.63 0.63 0.63 0.63
|
||||
# Silvermont 5.75/3.54 3.56 4.12 3.87(*) 4.11
|
||||
# Goldmont 3.82/1.26 1.26 1.29 1.29 1.50
|
||||
# Bulldozer 5.77/0.70 0.72 0.90 0.70 0.95
|
||||
#
|
||||
# (*) Atom Silvermont ECB result is suboptimal because of penalties
|
||||
|
|
|
@ -48,6 +48,7 @@
|
|||
# Nehalem(**) 7.63 6.88 +11%
|
||||
# Atom 17.1 16.4 +4%
|
||||
# Silvermont - 12.9
|
||||
# Goldmont - 8.85
|
||||
#
|
||||
# (*) Comparison is not completely fair, because "this" is ECB,
|
||||
# i.e. no extra processing such as counter values calculation
|
||||
|
@ -87,6 +88,7 @@
|
|||
# Nehalem 7.80
|
||||
# Atom 17.9
|
||||
# Silvermont 14.0
|
||||
# Goldmont 10.2
|
||||
#
|
||||
# November 2011.
|
||||
#
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
# Nehalem 29.6/40.3/14.6 10.0/11.8
|
||||
# Atom 57.3/74.2/32.1 60.9/77.2(***)
|
||||
# Silvermont 52.7/64.0/19.5 48.8/60.8(***)
|
||||
# Goldmont 38.9/49.0/17.8 10.6/12.6
|
||||
#
|
||||
# (*) "Hyper-threading" in the context refers rather to cache shared
|
||||
# among multiple cores, than to specifically Intel HTT. As vast
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
# Sandy Bridge 10.5/+47% 3.20
|
||||
# Haswell 8.15/+50% 2.83
|
||||
# Silvermont 17.4/+36% 8.35
|
||||
# Goldmont 13.4/+40% 4.36
|
||||
# Sledgehammer 10.2/+54%
|
||||
# Bulldozer 13.4/+50% 4.38(*)
|
||||
#
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
# Ivy Bridge 6.71/+46% 5.40/6.49 2.41
|
||||
# Haswell 5.92/+43% 5.20/6.45 2.42 1.23
|
||||
# Silvermont 12.0/+33% 7.75/7.40 7.03(iii)
|
||||
# Goldmont 10.6/+17% 5.10/- 3.28
|
||||
# Sledgehammer 7.28/+52% -/14.2(ii) -
|
||||
# Bulldozer 9.66/+28% 9.85/11.1 3.06(iv)
|
||||
# VIA Nano 10.5/+46% 6.72/8.60 6.05
|
||||
|
|
|
@ -74,6 +74,7 @@
|
|||
# Skylake 0.44(+110%)(if system doesn't support AVX)
|
||||
# Bulldozer 1.49(+27%)
|
||||
# Silvermont 2.88(+13%)
|
||||
# Goldmont 1.08(+24%)
|
||||
|
||||
# March 2013
|
||||
#
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
# Sandy Bridge 3.90/+100% 1.36
|
||||
# Haswell 3.88/+70% 1.18 0.72
|
||||
# Silvermont 11.0/+40% 4.80
|
||||
# Goldmont 4.10/+200% 2.10
|
||||
# VIA Nano 6.71/+90% 2.47
|
||||
# Sledgehammer 3.51/+180% 4.27
|
||||
# Bulldozer 4.53/+140% 1.31
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
# Haswell 1.14/+175% 1.11 0.65
|
||||
# Skylake 1.13/+120% 0.96 0.51
|
||||
# Silvermont 2.83/+95% -
|
||||
# Goldmont 1.70/+180% -
|
||||
# VIA Nano 1.82/+150% -
|
||||
# Sledgehammer 1.38/+160% -
|
||||
# Bulldozer 2.30/+130% 0.97
|
||||
|
|
|
@ -85,9 +85,11 @@
|
|||
# VIA Nano 9.32 7.15/+30%
|
||||
# Atom 10.3 9.17/+12%
|
||||
# Silvermont 13.1(*) 9.37/+40%
|
||||
# Goldmont 8.13 6.42/+27% 1.70/+380%(**)
|
||||
#
|
||||
# (*) obviously suboptimal result, nothing was done about it,
|
||||
# because SSSE3 code is compiled unconditionally;
|
||||
# (**) SHAEXT result
|
||||
|
||||
$flavour = shift;
|
||||
$output = shift;
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
# VIA Nano 91 - 52 33 14.7
|
||||
# Atom 126 - 68 48(***) 14.7
|
||||
# Silvermont 97 - 58 42(***) 17.5
|
||||
# Goldmont 80 - 48 19.5 12.0
|
||||
#
|
||||
# (*) whichever best applicable.
|
||||
# (**) x86_64 assembler performance is presented for reference
|
||||
|
|
|
@ -98,8 +98,9 @@
|
|||
# VIA Nano 23.0 16.5(+39%) - 14.7 -
|
||||
# Atom 23.0 18.9(+22%) - 14.7 -
|
||||
# Silvermont 27.4 20.6(+33%) - 17.5 -
|
||||
# Goldmont 18.9 14.3(+32%) 4.16(+350%) 12.0 -
|
||||
#
|
||||
# (*) whichever best applicable;
|
||||
# (*) whichever best applicable, including SHAEXT;
|
||||
# (**) switch from ror to shrd stands for fair share of improvement;
|
||||
# (***) execution time is fully determined by remaining integer-only
|
||||
# part, body_00_15; reducing the amount of SIMD instructions
|
||||
|
|
Loading…
Reference in a new issue