ARMv8 assembly pack: add Cortex performance numbers.

(cherry picked from commit 0f777aeb50)
2014-06-24 08:06:05 +02:00 · 2014-06-24 08:06:05 +02:00 · 5cd8ce42ec
commit 5cd8ce42ec
parent d15f2d98ef
3 changed files with 12 additions and 4 deletions
--- a/crypto/modes/asm/ghashv8-armx.pl
+++ b/crypto/modes/asm/ghashv8-armx.pl
@ -20,7 +20,8 @@
 #
 #		PMULL[2]	32-bit NEON(*)
 # Apple A7	1.76		5.62
-# Cortex-A5x	n/a		n/a
+# Cortex-A53	1.45		8.39
+# Cortex-A57	2.22		7.61
 #
 # (*)	presented for reference/comparison purposes;

--- a/crypto/sha/asm/sha1-armv8.pl
+++ b/crypto/sha/asm/sha1-armv8.pl
@ -14,7 +14,8 @@
 #
 #		hardware-assisted	software(*)
 # Apple A7	2.31			4.13 (+14%)
-# Cortex-A5x	n/a			n/a
+# Cortex-A53	2.19			8.73 (+108%)
+# Cortex-A57	2.35			7.88 (+74%)
 #
 # (*)	Software results are presented mostly for reference purposes.

--- a/crypto/sha/asm/sha512-armv8.pl
+++ b/crypto/sha/asm/sha512-armv8.pl
@ -14,12 +14,18 @@
 #
 #		SHA256-hw	SHA256(*)	SHA512
 # Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
-# Cortex-A5x	n/a		n/a		n/a
+# Cortex-A53	2.38		15.6 (+110%)	10.1 (+190%(***))
+# Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
 # 
 # (*)	Software SHA256 results are of lesser relevance, presented
 #	mostly for informational purposes.
 # (**)	The result is a trade-off: it's possible to improve it by
-#	10%, but at the cost of 20% loss on Cortex-A5x.
+#	10% (or by 1 cycle per round), but at the cost of 20% loss
+#	on Cortex-A53 (or by 4 cycles per round).
+# (***)	Super-impressive coefficients over gcc-generated code are
+#	indication of some compiler "pathology", most notably code
+#	generated with -mgeneral-regs-only is significanty faster
+#	and lags behind assembly only by 50-90%.

 $flavour=shift;
 $output=shift;