crypto/x86_64cpuid.pl: suppress AVX512F flag on Skylake-X.
It was observed that AVX512 code paths can negatively affect overall Skylake-X system performance. But we are talking specifically about 512-bit code, while AVX512VL, 256-bit variant of AVX512F instructions, is supposed to fly as smooth as AVX2. Which is why it remains unmasked. Reviewed-by: Rich Salz <rsalz@openssl.org> (Merged from https://github.com/openssl/openssl/pull/4838)
This commit is contained in:
parent
05de3a5be9
commit
7933762870
1 changed files with 8 additions and 0 deletions
|
@ -139,6 +139,7 @@ OPENSSL_ia32_cpuid:
|
|||
.Lnocacheinfo:
|
||||
mov \$1,%eax
|
||||
cpuid
|
||||
movd %eax,%xmm0 # put aside processor id
|
||||
and \$0xbfefffff,%edx # force reserved bits to 0
|
||||
cmp \$0,%r9d
|
||||
jne .Lnotintel
|
||||
|
@ -186,6 +187,13 @@ OPENSSL_ia32_cpuid:
|
|||
jc .Lnotknights
|
||||
and \$0xfff7ffff,%ebx # clear ADCX/ADOX flag
|
||||
.Lnotknights:
|
||||
movd %xmm0,%eax # restore processor id
|
||||
and \$0x0fff0ff0,%eax
|
||||
cmp \$0x00050650,%eax # Skylake-X
|
||||
jne .Lnotskylakex
|
||||
and \$0xfffeffff,%ebx # ~(1<<16)
|
||||
# suppress AVX512F flag on Skylake-X
|
||||
.Lnotskylakex:
|
||||
mov %ebx,8(%rdi) # save extended feature flags
|
||||
mov %ecx,12(%rdi)
|
||||
.Lno_extended_info:
|
||||
|
|
Loading…
Reference in a new issue