Extend OPENSSL_ia32cap_P with extra word to accomodate AVX2 capability.
This commit is contained in:
parent
b3aee265c5
commit
c5cd28bd64
8 changed files with 88 additions and 11 deletions
|
@ -125,7 +125,7 @@ static double SSLeay_MSVC5_hack=0.0; /* and for VC1.5 */
|
|||
defined(__INTEL__) || \
|
||||
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
|
||||
|
||||
extern unsigned int OPENSSL_ia32cap_P[2];
|
||||
extern unsigned int OPENSSL_ia32cap_P[4];
|
||||
unsigned int *OPENSSL_ia32cap_loc(void) { return OPENSSL_ia32cap_P; }
|
||||
|
||||
#if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY)
|
||||
|
@ -137,7 +137,7 @@ typedef unsigned long long IA32CAP;
|
|||
#endif
|
||||
void OPENSSL_cpuid_setup(void)
|
||||
{ static int trigger=0;
|
||||
IA32CAP OPENSSL_ia32_cpuid(void);
|
||||
IA32CAP OPENSSL_ia32_cpuid(unsigned int *);
|
||||
IA32CAP vec;
|
||||
char *env;
|
||||
|
||||
|
@ -151,10 +151,18 @@ void OPENSSL_cpuid_setup(void)
|
|||
#else
|
||||
if (!sscanf(env+off,"%lli",(long long *)&vec)) vec = strtoul(env+off,NULL,0);
|
||||
#endif
|
||||
if (off) vec = OPENSSL_ia32_cpuid()&~vec;
|
||||
if (off) vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P)&~vec;
|
||||
|
||||
OPENSSL_ia32cap_P[2] = 0;
|
||||
if ((env=strchr(env,':'))) {
|
||||
off = (env[1]=='~')?2:1;
|
||||
vec = strtoul(env+off,NULL,0);
|
||||
if (off>1) OPENSSL_ia32cap_P[2] &= ~vec;
|
||||
else OPENSSL_ia32cap_P[2] = vec;
|
||||
}
|
||||
}
|
||||
else
|
||||
vec = OPENSSL_ia32_cpuid();
|
||||
vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P);
|
||||
|
||||
/*
|
||||
* |(1<<10) sets a reserved bit to signal that variable
|
||||
|
@ -165,7 +173,7 @@ void OPENSSL_cpuid_setup(void)
|
|||
OPENSSL_ia32cap_P[1] = (unsigned int)(vec>>32);
|
||||
}
|
||||
#else
|
||||
unsigned int OPENSSL_ia32cap_P[2];
|
||||
unsigned int OPENSSL_ia32cap_P[4];
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
@ -173,7 +181,7 @@ unsigned int *OPENSSL_ia32cap_loc(void) { return NULL; }
|
|||
#endif
|
||||
int OPENSSL_NONPIC_relocated = 0;
|
||||
#if !defined(OPENSSL_CPUID_SETUP) && !defined(OPENSSL_CPUID_OBJ)
|
||||
void OPENSSL_cpuid_setup(void) {}
|
||||
void OPENSSL_cpuid_setup(unsigned int *) {}
|
||||
#endif
|
||||
|
||||
#if (defined(_WIN32) || defined(__CYGWIN__)) && defined(_WINDLL)
|
||||
|
|
|
@ -131,6 +131,32 @@ sub ::rdrand
|
|||
{ &::generic("rdrand",@_); }
|
||||
}
|
||||
|
||||
sub rxb {
|
||||
local *opcode=shift;
|
||||
my ($dst,$src1,$src2,$rxb)=@_;
|
||||
|
||||
$rxb|=0x7<<5;
|
||||
$rxb&=~(0x04<<5) if($dst>=8);
|
||||
$rxb&=~(0x01<<5) if($src1>=8);
|
||||
$rxb&=~(0x02<<5) if($src2>=8);
|
||||
push @opcode,$rxb;
|
||||
}
|
||||
|
||||
sub ::vprotd
|
||||
{ my $args=join(',',@_);
|
||||
if ($args =~ /xmm([0-7]),xmm([0-7]),([x0-9a-f]+)/)
|
||||
{ my @opcode=(0x8f);
|
||||
rxb(\@opcode,$1,$2,-1,0x08);
|
||||
push @opcode,0x78,0xc2;
|
||||
push @opcode,0xc0|($2&7)|(($1&7)<<3); # ModR/M
|
||||
my $c=$3;
|
||||
push @opcode,$c=~/^0/?oct($c):$c;
|
||||
&::data_byte(@opcode);
|
||||
}
|
||||
else
|
||||
{ &::generic("vprotd",@_); }
|
||||
}
|
||||
|
||||
# label management
|
||||
$lbdecor="L"; # local label decoration, set by package
|
||||
$label="000";
|
||||
|
|
|
@ -70,6 +70,8 @@ sub ::DWP
|
|||
{ my($addr,$reg1,$reg2,$idx)=@_;
|
||||
my $ret="";
|
||||
|
||||
if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; }
|
||||
|
||||
$addr =~ s/^\s+//;
|
||||
# prepend global references with optional underscore
|
||||
$addr =~ s/^([^\+\-0-9][^\+\-]*)/&::islabel($1) or "$nmdecor$1"/ige;
|
||||
|
@ -157,7 +159,7 @@ sub ::file_end
|
|||
}
|
||||
}
|
||||
if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) {
|
||||
my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,8";
|
||||
my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,16";
|
||||
if ($::macosx) { push (@out,"$tmp,2\n"); }
|
||||
elsif ($::elf) { push (@out,"$tmp,4\n"); }
|
||||
else { push (@out,"$tmp\n"); }
|
||||
|
|
|
@ -39,6 +39,8 @@ sub get_mem
|
|||
{ my($size,$addr,$reg1,$reg2,$idx)=@_;
|
||||
my($post,$ret);
|
||||
|
||||
if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; }
|
||||
|
||||
$ret .= "$size PTR " if ($size ne "");
|
||||
|
||||
$addr =~ s/^\s+//;
|
||||
|
@ -133,7 +135,7 @@ ___
|
|||
if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
|
||||
{ my $comm=<<___;
|
||||
.bss SEGMENT 'BSS'
|
||||
COMM ${nmdecor}OPENSSL_ia32cap_P:QWORD
|
||||
COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD:4
|
||||
.bss ENDS
|
||||
___
|
||||
# comment out OPENSSL_ia32cap_P declarations
|
||||
|
|
|
@ -36,6 +36,8 @@ sub get_mem
|
|||
{ my($size,$addr,$reg1,$reg2,$idx)=@_;
|
||||
my($post,$ret);
|
||||
|
||||
if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; }
|
||||
|
||||
if ($size ne "")
|
||||
{ $ret .= "$size";
|
||||
$ret .= " PTR" if ($::mwerks);
|
||||
|
@ -117,7 +119,7 @@ sub ::file_end
|
|||
{ if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
|
||||
{ my $comm=<<___;
|
||||
${drdecor}segment .bss
|
||||
${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 8
|
||||
${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 16
|
||||
___
|
||||
# comment out OPENSSL_ia32cap_P declarations
|
||||
grep {s/(^extern\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out;
|
||||
|
|
|
@ -23,7 +23,7 @@ print<<___;
|
|||
call OPENSSL_cpuid_setup
|
||||
|
||||
.hidden OPENSSL_ia32cap_P
|
||||
.comm OPENSSL_ia32cap_P,8,4
|
||||
.comm OPENSSL_ia32cap_P,16,4
|
||||
|
||||
.text
|
||||
|
||||
|
@ -52,12 +52,13 @@ OPENSSL_rdtsc:
|
|||
.size OPENSSL_rdtsc,.-OPENSSL_rdtsc
|
||||
|
||||
.globl OPENSSL_ia32_cpuid
|
||||
.type OPENSSL_ia32_cpuid,\@abi-omnipotent
|
||||
.type OPENSSL_ia32_cpuid,\@function,1
|
||||
.align 16
|
||||
OPENSSL_ia32_cpuid:
|
||||
mov %rbx,%r8 # save %rbx
|
||||
|
||||
xor %eax,%eax
|
||||
mov %eax,8(%rdi) # clear 3rd word
|
||||
cpuid
|
||||
mov %eax,%r11d # max value for standard query level
|
||||
|
||||
|
@ -125,6 +126,14 @@ OPENSSL_ia32_cpuid:
|
|||
shr \$14,%r10d
|
||||
and \$0xfff,%r10d # number of cores -1 per L1D
|
||||
|
||||
cmp \$7,%r11d
|
||||
jb .Lnocacheinfo
|
||||
|
||||
mov \$7,%eax
|
||||
xor %ecx,%ecx
|
||||
cpuid
|
||||
mov %ebx,8(%rdi)
|
||||
|
||||
.Lnocacheinfo:
|
||||
mov \$1,%eax
|
||||
cpuid
|
||||
|
@ -164,6 +173,7 @@ OPENSSL_ia32_cpuid:
|
|||
.Lclear_avx:
|
||||
mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
|
||||
and %eax,%r9d # clear AVX, FMA and AMD XOP bits
|
||||
andl \$0xffffffdf,8(%rdi) # cleax AVX2, ~(1<<5)
|
||||
.Ldone:
|
||||
shl \$32,%r9
|
||||
mov %r10d,%eax
|
||||
|
|
|
@ -22,6 +22,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
|||
&xor ("eax","eax");
|
||||
&bt ("ecx",21);
|
||||
&jnc (&label("nocpuid"));
|
||||
&mov ("esi",&wparam(0));
|
||||
&mov (&DWP(8,"esi"),"eax"); # clear 3rd word
|
||||
&cpuid ();
|
||||
&mov ("edi","eax"); # max value for standard query level
|
||||
|
||||
|
@ -89,6 +91,15 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
|||
&shr ("edi",14);
|
||||
&and ("edi",0xfff); # number of cores -1 per L1D
|
||||
|
||||
&cmp ("edi",7);
|
||||
&jb (&label("nocacheinfo"));
|
||||
|
||||
&mov ("esi",&wparam(0));
|
||||
&mov ("eax",7);
|
||||
&xor ("ecx","ecx");
|
||||
&cpuid ();
|
||||
&mov (&DWP(8,"esi"),"ebx");
|
||||
|
||||
&set_label("nocacheinfo");
|
||||
&mov ("eax",1);
|
||||
&cpuid ();
|
||||
|
@ -133,6 +144,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
|||
&and ("esi",0xfeffffff); # clear FXSR
|
||||
&set_label("clear_avx");
|
||||
&and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits
|
||||
&mov ("edi",&wparam(0));
|
||||
&and (&DWP(8,"edi"),0xffffffdf); # clear AVX2
|
||||
&set_label("done");
|
||||
&mov ("eax","esi");
|
||||
&mov ("edx","ebp");
|
||||
|
|
|
@ -72,3 +72,17 @@ the data cache is actually shared between logical cores. This in turn
|
|||
affects the decision on whether or not expensive countermeasures
|
||||
against cache-timing attacks are applied, most notably in AES assembler
|
||||
module.
|
||||
|
||||
The vector is further extended with EBX value returned by CPUID with
|
||||
EAX=7 and ECX=0 as input. Following bits are significant:
|
||||
|
||||
=item bit #64+3 denoting availability of BMI1 instructions, e.g. ANDN;
|
||||
|
||||
=item bit #64+5 denoting availability of AVX2 instructions;
|
||||
|
||||
=item bit #64+8 denoting availability of BMI2 instructions, e.g. MUXL
|
||||
and RORX;
|
||||
|
||||
=item bit #64+18 denoting availability of RDSEED instruction;
|
||||
|
||||
=itme bit #64+19 denoting availability of ADCX and ADOX instructions;
|
||||
|
|
Loading…
Reference in a new issue