x86 assembly pack: update performance results.

Reviewed-by: Richard Levitte <levitte@openssl.org>
2016-12-17 19:10:00 +01:00 · 2016-12-17 19:10:00 +01:00 · a30b0522cb
commit a30b0522cb
parent f15eed3b79
7 changed files with 13 additions and 0 deletions
--- a/crypto/aes/asm/aesni-x86.pl
+++ b/crypto/aes/asm/aesni-x86.pl
@ -62,7 +62,9 @@
 # Westmere	3.77/1.37	1.37	1.52	1.27
 # * Bridge	5.07/0.98	0.99	1.09	0.91	1.10
 # Haswell	4.44/0.80	0.97	1.03	0.72	0.76
+# Skylake	2.68/0.65	0.65	0.66	0.64	0.66
 # Silvermont	5.77/3.56	3.67	4.03	3.46	4.03
+# Goldmont	3.84/1.39	1.39	1.63	1.31	1.70
 # Bulldozer	5.80/0.98	1.05	1.24	0.93	1.23

 $PREFIX="aesni";	# if $PREFIX is set to "AES", the script
--- a/crypto/chacha/asm/chacha-x86.pl
+++ b/crypto/chacha/asm/chacha-x86.pl
@ -28,6 +28,7 @@
 # Westmere	9.50/+45%	3.35
 # Sandy Bridge	10.5/+47%	3.20
 # Haswell	8.15/+50%	2.83
+# Skylake	7.53/+22%	2.75
 # Silvermont	17.4/+36%	8.35
 # Goldmont	13.4/+40%	4.36
 # Sledgehammer	10.2/+54%
--- a/crypto/chacha/asm/chacha-x86_64.pl
+++ b/crypto/chacha/asm/chacha-x86_64.pl
@ -32,6 +32,7 @@
 # Sandy Bridge	8.31/+42%	5.45/6.76	2.72
 # Ivy Bridge	6.71/+46%	5.40/6.49	2.41
 # Haswell	5.92/+43%	5.20/6.45	2.42	    1.23
+# Skylake	5.87/+39%	4.70/-		2.31	    1.19
 # Silvermont	12.0/+33%	7.75/7.40	7.03(iii)
 # Goldmont	10.6/+17%	5.10/-		3.28
 # Sledgehammer	7.28/+52%	-/14.2(ii)	-
--- a/crypto/poly1305/asm/poly1305-x86.pl
+++ b/crypto/poly1305/asm/poly1305-x86.pl
@ -29,6 +29,7 @@
 # Westmere	4.58/+100%	1.43
 # Sandy Bridge	3.90/+100%	1.36
 # Haswell	3.88/+70%	1.18		0.72
+# Skylake	3.10/+60%	1.14		0.62
 # Silvermont	11.0/+40%	4.80
 # Goldmont	4.10/+200%	2.10
 # VIA Nano	6.71/+90%	2.47
--- a/crypto/sha/asm/sha1-586.pl
+++ b/crypto/sha/asm/sha1-586.pl
@ -104,10 +104,12 @@
 # Sandy Bridge	8.8		6.2/+40%	5.1(**)/+73%
 # Ivy Bridge	7.2		4.8/+51%	4.7(**)/+53%
 # Haswell	6.5		4.3/+51%	4.1(**)/+58%
+# Skylake	6.4		4.1/+55%	4.1(**)/+55%
 # Bulldozer	11.6		6.0/+92%
 # VIA Nano	10.6		7.5/+41%
 # Atom		12.5		9.3(*)/+35%
 # Silvermont	14.5		9.9(*)/+46%
+# Goldmont	8.8		6.7/+30%	1.7(***)/+415%
 #
 # (*)	Loop is 1056 instructions long and expected result is ~8.25.
 #	The discrepancy is because of front-end limitations, so
@ -115,6 +117,8 @@
 #	limited parallelism.
 #
 # (**)	As per above comment, the result is for AVX *plus* sh[rl]d.
+#
+# (***)	SHAEXT result

 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 push(@INC,"${dir}","${dir}../../perlasm");
--- a/crypto/sha/asm/sha256-586.pl
+++ b/crypto/sha/asm/sha256-586.pl
@ -57,14 +57,17 @@
 # Sandy Bridge	25	-	15.9		12.4	11.6
 # Ivy Bridge	24	-	15.0		11.4	10.3
 # Haswell	22	-	13.9		9.46	7.80
+# Skylake	20	-	14.9		9.50	7.70
 # Bulldozer	36	-	27/22		17.0	13.6
 # VIA Nano	36	-	25/22		16.8	16.5
 # Atom		50	-	30/25		21.9	18.9
 # Silvermont	40	-	34/31		22.9	20.6
+# Goldmont	29	-	20		16.3(***)
 #
 # (*)	numbers after slash are for unrolled loop, where applicable;
 # (**)	x86_64 assembly performance is presented for reference
 #	purposes, results are best-available;
+# (***)	SHAEXT result is 4.1, strangely enough better than 64-bit one;

 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 push(@INC,"${dir}","${dir}../../perlasm");
--- a/crypto/sha/asm/sha512-586.pl
+++ b/crypto/sha/asm/sha512-586.pl
@ -32,6 +32,7 @@
 # Sandy Bridge	58	-	35	11.9	11.2
 # Ivy Bridge	50	-	33	11.5	8.17
 # Haswell	46	-	29	11.3	7.66
+# Skylake	40	-	26	13.3	7.25
 # Bulldozer	121	-	50	14.0	13.5
 # VIA Nano	91	-	52	33	14.7
 # Atom		126	-	68	48(***)	14.7