diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c index 304c6b7062..680dd0a78c 100644 --- a/crypto/cryptlib.c +++ b/crypto/cryptlib.c @@ -665,7 +665,7 @@ const char *CRYPTO_get_lock_name(int type) defined(__INTEL__) || \ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) -unsigned int OPENSSL_ia32cap_P[2]; +extern unsigned int OPENSSL_ia32cap_P[4]; unsigned long *OPENSSL_ia32cap_loc(void) { if (sizeof(long)==4) /* @@ -674,6 +674,9 @@ unsigned long *OPENSSL_ia32cap_loc(void) * is 32-bit. */ OPENSSL_ia32cap_P[1]=0; + + OPENSSL_ia32cap_P[2]=0; + return (unsigned long *)OPENSSL_ia32cap_P; } @@ -686,7 +689,7 @@ typedef unsigned long long IA32CAP; #endif void OPENSSL_cpuid_setup(void) { static int trigger=0; - IA32CAP OPENSSL_ia32_cpuid(void); + IA32CAP OPENSSL_ia32_cpuid(unsigned int *); IA32CAP vec; char *env; @@ -700,10 +703,21 @@ void OPENSSL_cpuid_setup(void) #else if (!sscanf(env+off,"%lli",(long long *)&vec)) vec = strtoul(env+off,NULL,0); #endif - if (off) vec = OPENSSL_ia32_cpuid()&~vec; + if (off) vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P)&~vec; + else if (env[0]==':') vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P); + + OPENSSL_ia32cap_P[2] = 0; + if ((env=strchr(env,':'))) { + unsigned int vecx; + env++; + off = (env[0]=='~')?1:0; + vecx = strtoul(env+off,NULL,0); + if (off) OPENSSL_ia32cap_P[2] &= ~vecx; + else OPENSSL_ia32cap_P[2] = vecx; + } } else - vec = OPENSSL_ia32_cpuid(); + vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P); /* * |(1<<10) sets a reserved bit to signal that variable @@ -713,6 +727,8 @@ void OPENSSL_cpuid_setup(void) OPENSSL_ia32cap_P[0] = (unsigned int)vec|(1<<10); OPENSSL_ia32cap_P[1] = (unsigned int)(vec>>32); } +#else +unsigned int OPENSSL_ia32cap_P[4]; #endif #else diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl index 6ebfd017ea..34118c30ec 100644 --- a/crypto/x86_64cpuid.pl +++ b/crypto/x86_64cpuid.pl @@ -24,7 +24,7 @@ print<<___; call OPENSSL_cpuid_setup .hidden OPENSSL_ia32cap_P -.comm OPENSSL_ia32cap_P,8,4 +.comm OPENSSL_ia32cap_P,16,4 .text @@ -53,12 +53,13 @@ OPENSSL_rdtsc: .size OPENSSL_rdtsc,.-OPENSSL_rdtsc .globl OPENSSL_ia32_cpuid -.type OPENSSL_ia32_cpuid,\@abi-omnipotent +.type OPENSSL_ia32_cpuid,\@function,1 .align 16 OPENSSL_ia32_cpuid: mov %rbx,%r8 # save %rbx xor %eax,%eax + mov %eax,8(%rdi) # clear 3rd word cpuid mov %eax,%r11d # max value for standard query level @@ -126,6 +127,14 @@ OPENSSL_ia32_cpuid: shr \$14,%r10d and \$0xfff,%r10d # number of cores -1 per L1D + cmp \$7,%r11d + jb .Lnocacheinfo + + mov \$7,%eax + xor %ecx,%ecx + cpuid + mov %ebx,8(%rdi) + .Lnocacheinfo: mov \$1,%eax cpuid @@ -165,6 +174,7 @@ OPENSSL_ia32_cpuid: .Lclear_avx: mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) and %eax,%r9d # clear AVX, FMA and AMD XOP bits + andl \$0xffffffdf,8(%rdi) # cleax AVX2, ~(1<<5) .Ldone: shl \$32,%r9 mov %r10d,%eax diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl index 0212a5b63a..390c883d2b 100644 --- a/crypto/x86cpuid.pl +++ b/crypto/x86cpuid.pl @@ -22,6 +22,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &xor ("eax","eax"); &bt ("ecx",21); &jnc (&label("nocpuid")); + &mov ("esi",&wparam(0)); + &mov (&DWP(8,"esi"),"eax"); # clear 3rd word &cpuid (); &mov ("edi","eax"); # max value for standard query level @@ -79,6 +81,16 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &jmp (&label("generic")); &set_label("intel"); + &cmp ("edi",7); + &jb (&label("cacheinfo")); + + &mov ("esi",&wparam(0)); + &mov ("eax",7); + &xor ("ecx","ecx"); + &cpuid (); + &mov (&DWP(8,"esi"),"ebx"); + +&set_label("cacheinfo"); &cmp ("edi",4); &mov ("edi",-1); &jb (&label("nocacheinfo")); @@ -135,6 +147,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &and ("esi",0xfeffffff); # clear FXSR &set_label("clear_avx"); &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits + &mov ("edi",&wparam(0)); + &and (&DWP(8,"edi"),0xffffffdf); # clear AVX2 &set_label("done"); &mov ("eax","esi"); &mov ("edx","ebp"); @@ -198,7 +212,7 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &function_begin_B("OPENSSL_far_spin"); &pushf (); - &pop ("eax") + &pop ("eax"); &bt ("eax",9); &jnc (&label("nospin")); # interrupts are disabled