aesni-sha256-x86_64.pl: fix crash on AMD Jaguar.
It was also found that stich performs suboptimally on AMD Jaguar, hence execution is limited to XOP-capable and Intel processors. Reviewed-by: Kurt Roeckx <kurt@openssl.org>
This commit is contained in:
parent
39e46af6bb
commit
a5fd24d19b
2 changed files with 13 additions and 5 deletions
|
@ -140,11 +140,8 @@ $code.=<<___ if ($avx>1);
|
|||
je ${func}_avx2
|
||||
___
|
||||
$code.=<<___;
|
||||
and \$`1<<30`,%eax # mask "Intel CPU" bit
|
||||
and \$`1<<28|1<<9`,%r10d # mask AVX+SSSE3 bits
|
||||
or %eax,%r10d
|
||||
cmp \$`1<<28|1<<9|1<<30`,%r10d
|
||||
je ${func}_avx
|
||||
and \$`1<<28`,%r10d # check for AVX
|
||||
jnz ${func}_avx
|
||||
ud2
|
||||
___
|
||||
}
|
||||
|
|
|
@ -498,7 +498,18 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx,
|
|||
iv = AES_BLOCK_SIZE;
|
||||
|
||||
# if defined(STITCHED_CALL)
|
||||
/*
|
||||
* Assembly stitch handles AVX-capable processors, but its
|
||||
* performance is not optimal on AMD Jaguar, ~40% worse, for
|
||||
* unknown reasons. Incidentally processor in question supports
|
||||
* AVX, but not AMD-specific XOP extension, which can be used
|
||||
* to identify it and avoid stitch invocation. So that after we
|
||||
* establish that current CPU supports AVX, we even see if it's
|
||||
* either even XOP-capable Bulldozer-based or GenuineIntel one.
|
||||
*/
|
||||
if (OPENSSL_ia32cap_P[1] & (1 << (60 - 32)) && /* AVX? */
|
||||
((OPENSSL_ia32cap_P[1] & (1 << (43 - 32))) /* XOP? */
|
||||
| (OPENSSL_ia32cap_P[0] & (1<<30))) && /* "Intel CPU"? */
|
||||
plen > (sha_off + iv) &&
|
||||
(blocks = (plen - (sha_off + iv)) / SHA256_CBLOCK)) {
|
||||
SHA256_Update(&key->md, in + iv, sha_off);
|
||||
|
|
Loading…
Reference in a new issue