Multiple assembler packs: add experimental memory bus instrumentation.

This commit is contained in:
Andy Polyakov 2011-04-17 12:46:00 +00:00
parent 764ef43962
commit 5fabb88a78
10 changed files with 724 additions and 12 deletions

View file

@ -126,3 +126,93 @@ OPENSSL_cleanse:
.Ldone: ret ($26)
.end OPENSSL_cleanse
___
{
my ($out,$cnt,$max)=("\$16","\$17","\$18");
my ($tick,$lasttick)=("\$19","\$20");
my ($diff,$lastdiff)=("\$21","\$22");
my ($v0,$ra,$sp,$zero)=("\$0","\$26","\$30","\$31");
print <<___;
.globl OPENSSL_instrument_bus
.ent OPENSSL_instrument_bus
OPENSSL_instrument_bus:
.frame $sp,0,$ra
.prologue 0
mov $cnt,$v0
rpcc $lasttick
mov 0,$diff
ecb ($out)
ldl_l $tick,0($out)
addl $diff,$tick,$tick
mov $tick,$diff
stl_c $tick,0($out)
stl $diff,0($out)
.Loop: rpcc $tick
subq $tick,$lasttick,$diff
mov $tick,$lasttick
ecb ($out)
ldl_l $tick,0($out)
addl $diff,$tick,$tick
mov $tick,$diff
stl_c $tick,0($out)
stl $diff,0($out)
subl $cnt,1,$cnt
lda $out,4($out)
bne $cnt,.Loop
ret ($ra)
.end OPENSSL_instrument_bus
.globl OPENSSL_instrument_bus2
.ent OPENSSL_instrument_bus2
OPENSSL_instrument_bus2:
.frame $sp,0,$ra
.prologue 0
mov $cnt,$v0
rpcc $lasttick
mov 0,$diff
ecb ($out)
ldl_l $tick,0($out)
addl $diff,$tick,$tick
mov $tick,$diff
stl_c $tick,0($out)
stl $diff,0($out)
rpcc $tick
subq $tick,$lasttick,$diff
mov $tick,$lasttick
mov $diff,$lastdiff
.Loop2:
ecb ($out)
ldl_l $tick,0($out)
addl $diff,$tick,$tick
mov $tick,$diff
stl_c $tick,0($out)
stl $diff,0($out)
subl $max,1,$max
beq $max,.Ldone2
rpcc $tick
subq $tick,$lasttick,$diff
mov $tick,$lasttick
subq $lastdiff,$diff,$tick
mov $diff,$lastdiff
cmovne $tick,1,$tick
subl $cnt,$tick,$cnt
s4addq $tick,$out,$out
bne $cnt,.Loop2
.Ldone2:
subl $v0,$cnt,$v0
ret ($ra)
.end OPENSSL_instrument_bus2
___
}

View file

@ -26,7 +26,7 @@ OPENSSL_atomic_add:
{ .mii; mov ar.ccv=r2
add r8=r2,r33
mov r3=r2 };;
{ .mmi; mf
{ .mmi; mf;;
cmpxchg4.acq r2=[r32],r8,ar.ccv
nop.i 0 };;
{ .mib; cmp.ne p6,p0=r2,r3
@ -165,3 +165,89 @@ OPENSSL_cleanse:
(p7) br.cond.dpnt .Little
(p6) br.ret.sptk.many b0 };;
.endp OPENSSL_cleanse#
.global OPENSSL_instrument_bus#
.proc OPENSSL_instrument_bus#
OPENSSL_instrument_cache:
{ .mmi; mov r2=r33
#if defined(_HPUX_SOURCE) && !defined(_LP64)
addp4 r32=0,r32
#endif
}
{ .mmi; mov r8=ar.itc;;
mov r10=r0
mov r9=r8 };;
{ .mmi; fc r32;;
ld4 r8=[r32] };;
{ .mmi; mf
mov ar.ccv=r8
add r8=r8,r10 };;
{ .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv
};;
.Loop:
{ .mmi; mov r8=ar.itc;;
sub r10=r8,r9 // diff=tick-lasttick
mov r9=r8 };; // lasttick=tick
{ .mmi; fc r32;;
ld4 r8=[r32] };;
{ .mmi; mf
mov ar.ccv=r8
add r8=r8,r10 };;
{ .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv
add r33=-1,r33
add r32=4,r32 };;
{ .mib; cmp4.ne p6,p0=0,r33
(p6) br.cond.dptk .Loop };;
{ .mib; sub r8=r2,r33
br.ret.sptk.many b0 };;
.endp OPENSSL_instrument_bus#
.global OPENSSL_instrument_bus2#
.proc OPENSSL_instrument_bus2#
OPENSSL_instrument_cache2:
{ .mmi; mov r2=r33 // put aside cnt
#if defined(_HPUX_SOURCE) && !defined(_LP64)
addp4 r32=0,r32
#endif
}
{ .mmi; mov r8=ar.itc;;
mov r10=r0
mov r9=r8 };;
{ .mmi; fc r32;;
ld4 r8=[r32] };;
{ .mmi; mf
mov ar.ccv=r8
add r8=r8,r10 };;
{ .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv
};;
{ .mmi; mov r8=ar.itc;;
sub r10=r8,r9
mov r9=r8 };;
.Loop2:
{ .mmi; mov r11=r10 // lastdiff=diff
add r34=-1,r34 };; // --max
{ .mmi; fc r32;;
ld4 r8=[r32]
cmp4.eq p6,p0=0,r34 };;
{ .mmi; mf
mov ar.ccv=r8
add r8=r8,r10 };;
{ .mmb; cmpxchg4.acq r3=[r32],r8,ar.ccv
(p6) br.cond.spnt .Ldone2 };;
{ .mmi; mov r8=ar.itc;;
sub r10=r8,r9 // diff=tick-lasttick
mov r9=r8 };; // lasttick=tick
{ .mmi; cmp.ne p6,p0=r10,r11;; // diff!=lastdiff
(p6) add r33=-1,r33 };; // conditional --cnt
{ .mib; cmp4.ne p7,p0=0,r33
(p6) add r32=4,r32 // conditional ++out
(p7) br.cond.dptk .Loop2 };;
.Ldone2:
{ .mib; sub r8=r2,r33
br.ret.sptk.many b0 };;
.endp OPENSSL_instrument_bus2#

View file

@ -87,8 +87,8 @@ OPENSSL_wipe_cpu
.PROCEND
___
{
$inp="%r26";
$len="%r25";
my $inp="%r26";
my $len="%r25";
$code.=<<___;
.EXPORT OPENSSL_cleanse,ENTRY,ARGW0=GR,ARGW1=GR
@ -112,9 +112,9 @@ Lalign
Laligned
andcm $len,%r1,%r28
Loop
Lot
$ST %r0,0($inp)
addib,*<> -$SIZE_T,%r28,Loop
addib,*<> -$SIZE_T,%r28,Lot
ldo $SIZE_T($inp),$inp
and,*<> $len,%r1,$len
@ -130,7 +130,93 @@ Ldone
.PROCEND
___
}
{
my ($out,$cnt,$max)=("%r26","%r25","%r24");
my ($tick,$lasttick)=("%r23","%r22");
my ($diff,$lastdiff)=("%r21","%r20");
$code.=<<___;
.EXPORT OPENSSL_instrument_bus,ENTRY,ARGW0=GR,ARGW1=GR
.ALIGN 8
OPENSSL_instrument_bus
.PROC
.CALLINFO NO_CALLS
.ENTRY
copy $cnt,$rv
mfctl %cr16,$tick
copy $tick,$lasttick
ldi 0,$diff
fdc 0($out)
ldw 0($out),$tick
add $diff,$tick,$tick
stw $tick,0($out)
Loop
mfctl %cr16,$tick
sub $tick,$lasttick,$diff
copy $tick,$lasttick
fdc 0($out)
ldw 0($out),$tick
add $diff,$tick,$tick
stw $tick,0($out)
addib,<> -1,$cnt,Loop
addi 4,$out,$out
bv ($rp)
.EXIT
sub $rv,$cnt,$rv
.PROCEND
.EXPORT OPENSSL_instrument_bus2,ENTRY,ARGW0=GR,ARGW1=GR
.ALIGN 8
OPENSSL_instrument_bus2
.PROC
.CALLINFO NO_CALLS
.ENTRY
copy $cnt,$rv
sub %r0,$cnt,$cnt
mfctl %cr16,$tick
copy $tick,$lasttick
ldi 0,$diff
fdc 0($out)
ldw 0($out),$tick
add $diff,$tick,$tick
stw $tick,0($out)
mfctl %cr16,$tick
sub $tick,$lasttick,$diff
copy $tick,$lasttick
Loop2
copy $diff,$lastdiff
fdc 0($out)
ldw 0($out),$tick
add $diff,$tick,$tick
stw $tick,0($out)
addib,= -1,$max,Ldone2
nop
mfctl %cr16,$tick
sub $tick,$lasttick,$diff
copy $tick,$lasttick
cmpclr,<> $lastdiff,$diff,$tick
ldi 1,$tick
ldi 1,%r1
xor %r1,$tick,$tick
addb,<> $tick,$cnt,Loop2
shladd,l $tick,2,$out,$out
Ldone2
bv ($rp)
.EXIT
add $rv,$cnt,$rv
.PROCEND
___
}
$code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4);
$code =~ s/,\*/,/gm if ($SIZE_T==4);
print $code;

View file

@ -69,10 +69,10 @@ $code=<<___;
.globl .OPENSSL_atomic_add
.align 4
.OPENSSL_atomic_add:
Loop: lwarx r5,0,r3
Ladd: lwarx r5,0,r3
add r0,r4,r5
stwcx. r0,0,r3
bne- Loop
bne- Ladd
$SIGNX r3,r0
blr
@ -112,6 +112,89 @@ Laligned:
bne Little
blr
___
{
my ($out,$cnt,$max)=("r3","r4","r5");
my ($tick,$lasttick)=("r6","r7");
my ($diff,$lastdiff)=("r8","r9");
$code.=<<___;
.globl .OPENSSL_instrument_bus
.align 4
.OPENSSL_instrument_bus:
mtctr $cnt
mftb $lasttick # collect 1st tick
li $diff,0
dcbf 0,$out # flush cache line
lwarx $tick,0,$out # load and lock
add $tick,$tick,$diff
stwcx. $tick,0,$out
stwx $tick,0,$out
Loop: mftb $tick
sub $diff,$tick,$lasttick
mr $lasttick,$tick
dcbf 0,$out # flush cache line
lwarx $tick,0,$out # load and lock
add $tick,$tick,$diff
stwcx. $tick,0,$out
stwx $tick,0,$out
addi $out,$out,4 # ++$out
bdnz Loop
mr r3,$cnt
blr
.globl .OPENSSL_instrument_bus2
.align 4
.OPENSSL_instrument_bus2:
mr r0,$cnt
slwi $cnt,$cnt,2
mftb $lasttick # collect 1st tick
li $diff,0
dcbf 0,$out # flush cache line
lwarx $tick,0,$out # load and lock
add $tick,$tick,$diff
stwcx. $tick,0,$out
stwx $tick,0,$out
mftb $tick # collect 1st diff
sub $diff,$tick,$lasttick
mr $lasttick,$tick
mr $lastdiff,$diff
Loop2:
dcbf 0,$out # flush cache line
lwarx $tick,0,$out # load and lock
add $tick,$tick,$diff
stwcx. $tick,0,$out
stwx $tick,0,$out
addic. $max,$max,-1
beq Ldone2
mftb $tick
sub $diff,$tick,$lasttick
mr $lasttick,$tick
cmplw 7,$diff,$lastdiff
mr $lastdiff,$diff
mfcr $tick # pull cr
not $tick,$tick # flip bits
rlwinm $tick,$tick,1,29,29 # isolate flipped eq bit and scale
sub. $cnt,$cnt,$tick # conditional --$cnt
add $out,$out,$tick # conditional ++$out
bne Loop2
Ldone2:
srwi $cnt,$cnt,2
sub r3,r0,$cnt
blr
___
}
$code =~ s/\`([^\`]*)\`/eval $1/gem;
print $code;

View file

@ -93,6 +93,22 @@ OPENSSL_cleanse:
br %r14
.size OPENSSL_cleanse,.-OPENSSL_cleanse
.globl OPENSSL_instrument_bus
.type OPENSSL_instrument_bus,@function
.align 16
OPENSSL_instrument_bus:
lghi %r2,0
br %r14
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
.globl OPENSSL_instrument_bus2
.type OPENSSL_instrument_bus2,@function
.align 16
OPENSSL_instrument_bus2:
lghi %r2,0
br %r14
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
.section .init
brasl %r14,OPENSSL_cpuid_setup

View file

@ -397,6 +397,102 @@ OPENSSL_cleanse:
.type OPENSSL_cleanse,#function
.size OPENSSL_cleanse,.-OPENSSL_cleanse
.global _sparcv9_vis1_instrument_bus
.align 8
_sparcv9_vis1_instrument_bus:
mov %o1,%o3 ! save cnt
.word 0x99410000 !rd %tick,%o4 ! tick
mov %o4,%o5 ! lasttick = tick
set 0,%g4 ! diff
andn %o0,63,%g1
.word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
.word 0x8143e040 !membar #Sync
.word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
.word 0x8143e040 !membar #Sync
ld [%o0],%o4
add %o4,%g4,%g4
.word 0xc9e2100c !cas [%o0],%o4,%g4
.Loop: .word 0x99410000 !rd %tick,%o4
sub %o4,%o5,%g4 ! diff=tick-lasttick
mov %o4,%o5 ! lasttick=tick
andn %o0,63,%g1
.word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
.word 0x8143e040 !membar #Sync
.word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
.word 0x8143e040 !membar #Sync
ld [%o0],%o4
add %o4,%g4,%g4
.word 0xc9e2100c !cas [%o0],%o4,%g4
subcc %o1,1,%o1 ! --$cnt
bnz .Loop
add %o0,4,%o0 ! ++$out
retl
mov %o3,%o0
.type _sparcv9_vis1_instrument_bus,#function
.size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
.global _sparcv9_vis1_instrument_bus2
.align 8
_sparcv9_vis1_instrument_bus2:
mov %o1,%o3 ! save cnt
sll %o1,2,%o1 ! cnt*=4
.word 0x99410000 !rd %tick,%o4 ! tick
mov %o4,%o5 ! lasttick = tick
set 0,%g4 ! diff
andn %o0,63,%g1
.word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
.word 0x8143e040 !membar #Sync
.word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
.word 0x8143e040 !membar #Sync
ld [%o0],%o4
add %o4,%g4,%g4
.word 0xc9e2100c !cas [%o0],%o4,%g4
.word 0x99410000 !rd %tick,%o4 ! tick
sub %o4,%o5,%g4 ! diff=tick-lasttick
mov %o4,%o5 ! lasttick=tick
mov %g4,%g5 ! lastdiff=diff
.Loop2:
andn %o0,63,%g1
.word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
.word 0x8143e040 !membar #Sync
.word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
.word 0x8143e040 !membar #Sync
ld [%o0],%o4
add %o4,%g4,%g4
.word 0xc9e2100c !cas [%o0],%o4,%g4
subcc %o2,1,%o2 ! --max
bz .Ldone2
nop
.word 0x99410000 !rd %tick,%o4 ! tick
sub %o4,%o5,%g4 ! diff=tick-lasttick
mov %o4,%o5 ! lasttick=tick
cmp %g4,%g5
mov %g4,%g5 ! lastdiff=diff
.word 0x83408000 !rd %ccr,%g1
and %g1,4,%g1 ! isolate zero flag
xor %g1,4,%g1 ! flip zero flag
subcc %o1,%g1,%o1 ! conditional --$cnt
bnz .Loop2
add %o0,%g1,%o0 ! conditional ++$out
.Ldone2:
srl %o1,2,%o1
retl
sub %o3,%o1,%o0
.type _sparcv9_vis1_instrument_bus2,#function
.size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
.section ".init",#alloc,#execinstr
call OPENSSL_cpuid_setup
nop

View file

@ -11,6 +11,7 @@
#define SPARCV9_VIS1 (1<<2)
#define SPARCV9_VIS2 (1<<3) /* reserved */
#define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */
#define SPARCV9_BLK (1<<5) /* VIS1 block copy */
static int OPENSSL_sparcv9cap_P=SPARCV9_TICK_PRIVILEGED;
@ -31,6 +32,8 @@ void _sparcv9_vis1_probe(void);
unsigned long _sparcv9_vis1_instrument(void);
void _sparcv9_vis2_probe(void);
void _sparcv9_fmadd_probe(void);
size_t _sparcv9_vis1_instrument_bus(unsigned int *,size_t);
size_t _sparcv8_vis1_instrument_bus2(unsigned int *,size_t,size_t);
unsigned long OPENSSL_rdtsc(void)
{
@ -44,6 +47,24 @@ unsigned long OPENSSL_rdtsc(void)
return _sparcv9_rdtick();
}
size_t OPENSSL_instrument_bus(unsigned int *out,size_t cnt)
{
if (OPENSSL_sparcv9cap_P&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK) ==
SPARCV9_BLK)
return _sparcv9_vis1_instrument_bus(out,cnt);
else
return 0;
}
size_t OPENSSL_instrument_bus2(unsigned int *out,size_t cnt,size_t max)
{
if (OPENSSL_sparcv9cap_P&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK) ==
SPARCV9_BLK)
return _sparcv9_vis1_instrument_bus2(out,cnt,max);
else
return 0;
}
#if 0 && defined(__sun) && defined(__SVR4)
/* This code path is disabled, because of incompatibility of
* libdevinfo.so.1 and libmalloc.so.1 (see below for details)
@ -112,7 +133,7 @@ void OPENSSL_cpuid_setup(void)
if (sysinfo(SI_ISALIST,si,sizeof(si))>0)
{
if (strstr(si,"+vis"))
OPENSSL_sparcv9cap_P |= SPARCV9_VIS1;
OPENSSL_sparcv9cap_P |= SPARCV9_VIS1|SPARCV9_BLK;
if (strstr(si,"+vis2"))
{
OPENSSL_sparcv9cap_P |= SPARCV9_VIS2;
@ -169,7 +190,6 @@ void OPENSSL_cpuid_setup(void)
char *e;
struct sigaction common_act,ill_oact,bus_oact;
sigset_t all_masked,oset;
int sig;
static int trigger=0;
if (trigger) return;
@ -211,7 +231,7 @@ void OPENSSL_cpuid_setup(void)
if (sigsetjmp(common_jmp,1) == 0)
{
_sparcv9_vis1_probe();
OPENSSL_sparcv9cap_P |= SPARCV9_VIS1;
OPENSSL_sparcv9cap_P |= SPARCV9_VIS1|SPARCV9_BLK;
/* detect UltraSPARC-Tx, see sparccpud.S for details... */
if (_sparcv9_vis1_instrument() >= 12)
OPENSSL_sparcv9cap_P &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU);

View file

@ -9,8 +9,9 @@ $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output";
if ($win64) { $arg1="%rcx"; $arg2="%rdx"; }
else { $arg1="%rdi"; $arg2="%rsi"; }
($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
("%rdi","%rsi","%rdx","%rcx"); # Unix order
print<<___;
.extern OPENSSL_cpuid_setup
.section .init
@ -228,5 +229,95 @@ OPENSSL_wipe_cpu:
ret
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
___
{
my $out="%r10";
my $cnt="%rcx";
my $max="%r11";
my $lasttick="%r8d";
my $lastdiff="%r9d";
my $redzone=win64?8:-8;
print<<___;
.globl OPENSSL_instrument_bus
.type OPENSSL_instrument_bus,\@abi-omnipotent
.align 16
OPENSSL_instrument_bus:
mov $arg1,$out # tribute to Win64
mov $arg2,$cnt
mov $arg2,$max
rdtsc # collect 1st tick
mov %eax,$lasttick # lasttick = tick
mov \$0,$lastdiff # lastdiff = 0
clflush ($out)
lock
add $lastdiff,($out)
jmp .Loop
.align 16
.Loop: rdtsc
mov %eax,%edx
sub $lasttick,%eax
mov %edx,$lasttick
mov %eax,$lastdiff
clflush ($out)
lock
add %eax,($out)
lea 4($out),$out
sub \$1,$cnt
jnz .Loop
mov $max,%rax
ret
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
.globl OPENSSL_instrument_bus2
.type OPENSSL_instrument_bus2,\@abi-omnipotent
.align 16
OPENSSL_instrument_bus2:
mov $arg1,$out # tribute to Win64
mov $arg2,$cnt
mov $arg3,$max
mov $cnt,$redzone(%rsp)
rdtsc # collect 1st tick
mov %eax,$lasttick # lasttick = tick
mov \$0,$lastdiff # lastdiff = 0
clflush ($out)
lock
add $lastdiff,($out)
rdtsc # collect 1st diff
mov %eax,%edx
sub $lasttick,%eax # diff
mov %edx,$lasttick # lasttick = tick
mov %eax,$lastdiff # lastdiff = diff
.Loop2:
clflush ($out)
lock
add %eax,($out) # accumulate diff
sub \$1,$max
jz .Ldone2
rdtsc
mov %eax,%edx
sub $lasttick,%eax # diff
mov %edx,$lasttick # lasttick = tick
cmp $lastdiff,%eax
mov %eax,$lastdiff # lastdiff = diff
mov \$0,%edx
setne %dl
sub %rdx,$cnt # conditional --$cnt
lea ($out,%rdx,4),$out # conditional ++$out
jnz .Loop2
.Ldone2:
mov $redzone(%rsp),%rax
sub $cnt,%rax
ret
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
___
}
close STDOUT; # flush

View file

@ -307,6 +307,108 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&ret ();
&function_end_B("OPENSSL_cleanse");
{
my $lasttick = "esi";
my $lastdiff = "ebx";
my $out = "edi";
my $cnt = "ecx";
my $max = "ebp";
&function_begin("OPENSSL_instrument_bus");
&mov ("eax",0);
if ($sse2) {
&picmeup("edx","OPENSSL_ia32cap_P");
&bt (&DWP(0,"edx"),4);
&jnc (&label("nogo")); # no TSC
&bt (&DWP(0,"edx"),19);
&jnc (&label("nogo")); # no CLFLUSH
&mov ($out,&wparam(0)); # load arguments
&mov ($cnt,&wparam(1));
# collect 1st tick
&rdtsc ();
&mov ($lasttick,"eax"); # lasttick = tick
&mov ($lastdiff,0); # lastdiff = 0
&clflush(&DWP(0,$out));
&lock ();
&add (&DWP(0,$out),$lastdiff);
&jmp (&label("loop"));
&set_label("loop",16);
&rdtsc ();
&mov ("edx","eax"); # put aside tick (yes, I neglect edx)
&sub ("eax",$lasttick); # diff
&mov ($lasttick,"edx"); # lasttick = tick
&mov ($lastdiff,"eax"); # lastdiff = diff
&clflush(&DWP(0,$out));
&lock ();
&add (&DWP(0,$out),"eax"); # accumulate diff
&lea ($out,&DWP(4,$out)); # ++$out
&sub ($cnt,1); # --$cnt
&jnz (&label("loop"));
&mov ("eax",&wparam(1));
&set_label("nogo");
}
&function_end("OPENSSL_instrument_bus");
&function_begin("OPENSSL_instrument_bus2");
&mov ("eax",0);
if ($sse2) {
&picmeup("edx","OPENSSL_ia32cap_P");
&bt (&DWP(0,"edx"),4);
&jnc (&label("nogo")); # no TSC
&bt (&DWP(0,"edx"),19);
&jnc (&label("nogo")); # no CLFLUSH
&mov ($out,&wparam(0)); # load arguments
&mov ($cnt,&wparam(1));
&mov ($max,&wparam(2));
&rdtsc (); # collect 1st tick
&mov ($lasttick,"eax"); # lasttick = tick
&mov ($lastdiff,0); # lastdiff = 0
&clflush(&DWP(0,$out));
&lock ();
&add (&DWP(0,$out),$lastdiff);
&rdtsc (); # collect 1st diff
&mov ("edx","eax"); # put aside tick (yes, I neglect edx)
&sub ("eax",$lasttick); # diff
&mov ($lasttick,"edx"); # lasttick = tick
&mov ($lastdiff,"eax"); # lastdiff = diff
&jmp (&label("loop2"));
&set_label("loop2",16);
&clflush(&DWP(0,$out));
&lock ();
&add (&DWP(0,$out),"eax"); # accumulate diff
&sub ($max,1);
&jz (&label("done2"));
&rdtsc ();
&mov ("edx","eax"); # put aside tick (yes, I neglect edx)
&sub ("eax",$lasttick); # diff
&mov ($lasttick,"edx"); # lasttick = tick
&cmp ("eax",$lastdiff);
&mov ($lastdiff,"eax"); # lastdiff = diff
&mov ("edx",0);
&setne ("dl");
&sub ($cnt,"edx"); # conditional --$cnt
&lea ($out,&DWP(0,$out,"edx",4)); # conditional ++$out
&jnz (&label("loop2"));
&set_label("done2");
&mov ("eax",&wparam(1));
&sub ("eax",$cnt);
&set_label("nogo");
}
&function_end("OPENSSL_instrument_bus2");
}
&initseg("OPENSSL_cpuid_setup");
&asm_finish();

View file

@ -0,0 +1,42 @@
=pod
=head1 NAME
OPENSSL_instrument_bus[2] - instrument references to memory bus
=head1 SYNOPSIS
#ifdef OPENSSL_CPUID_OBJ
size_t OPENSSL_instrument_bus (int *vector,size_t num);
size_t OPENSSL_instrument_bus2(int *vector,size_t num,size_t max);
#endif
=head1 DESCRIPTION
It was empirically found that timings of references to primary memory
are subject to irregular, apparently non-deterministic variations. The
subroutines in question instrument these references for purposes of
gathering entropy for random number generator. In order to make it
bus-bound a 'flush cache line' instruction is used between probes. In
addition probes are added to B<vector> elements in atomic or
interlocked manner, which should contribute additional noise on
multi-processor systems. This also means that B<vector[num]> should be
zeroed upon invocation (if you want to retrieve actual probe values).
OPENSSL_instrument_bus performs B<num> probes and records the number of
oscillator cycles every probe took.
OPENSSL_instrument_bus2 on the other hand B<accumulates> consecutive
probes with the same value, i.e. in a way it records duration of
periods when probe values appeared deterministic. The subroutine
performs at most B<max> probes in attempt to fill the B<vector[num]>,
with B<max> value of 0 meaning "as many as it takes."
=head1 RETURN VALUE
Return value of 0 indicates that CPU is not capable of performing the
benchmark, either because oscillator counter or 'flush cache line' is
not available on current platform. For reference, on x86 'flush cache
line' was introduced with the SSE2 extensions.
Otherwise number of recorded values is returned.