Multiple assembler packs: add experimental memory bus instrumentation.
This commit is contained in:
parent
764ef43962
commit
5fabb88a78
10 changed files with 724 additions and 12 deletions
|
@ -126,3 +126,93 @@ OPENSSL_cleanse:
|
|||
.Ldone: ret ($26)
|
||||
.end OPENSSL_cleanse
|
||||
___
|
||||
{
|
||||
my ($out,$cnt,$max)=("\$16","\$17","\$18");
|
||||
my ($tick,$lasttick)=("\$19","\$20");
|
||||
my ($diff,$lastdiff)=("\$21","\$22");
|
||||
my ($v0,$ra,$sp,$zero)=("\$0","\$26","\$30","\$31");
|
||||
|
||||
print <<___;
|
||||
.globl OPENSSL_instrument_bus
|
||||
.ent OPENSSL_instrument_bus
|
||||
OPENSSL_instrument_bus:
|
||||
.frame $sp,0,$ra
|
||||
.prologue 0
|
||||
mov $cnt,$v0
|
||||
|
||||
rpcc $lasttick
|
||||
mov 0,$diff
|
||||
|
||||
ecb ($out)
|
||||
ldl_l $tick,0($out)
|
||||
addl $diff,$tick,$tick
|
||||
mov $tick,$diff
|
||||
stl_c $tick,0($out)
|
||||
stl $diff,0($out)
|
||||
|
||||
.Loop: rpcc $tick
|
||||
subq $tick,$lasttick,$diff
|
||||
mov $tick,$lasttick
|
||||
|
||||
ecb ($out)
|
||||
ldl_l $tick,0($out)
|
||||
addl $diff,$tick,$tick
|
||||
mov $tick,$diff
|
||||
stl_c $tick,0($out)
|
||||
stl $diff,0($out)
|
||||
|
||||
subl $cnt,1,$cnt
|
||||
lda $out,4($out)
|
||||
bne $cnt,.Loop
|
||||
|
||||
ret ($ra)
|
||||
.end OPENSSL_instrument_bus
|
||||
|
||||
.globl OPENSSL_instrument_bus2
|
||||
.ent OPENSSL_instrument_bus2
|
||||
OPENSSL_instrument_bus2:
|
||||
.frame $sp,0,$ra
|
||||
.prologue 0
|
||||
mov $cnt,$v0
|
||||
|
||||
rpcc $lasttick
|
||||
mov 0,$diff
|
||||
|
||||
ecb ($out)
|
||||
ldl_l $tick,0($out)
|
||||
addl $diff,$tick,$tick
|
||||
mov $tick,$diff
|
||||
stl_c $tick,0($out)
|
||||
stl $diff,0($out)
|
||||
|
||||
rpcc $tick
|
||||
subq $tick,$lasttick,$diff
|
||||
mov $tick,$lasttick
|
||||
mov $diff,$lastdiff
|
||||
.Loop2:
|
||||
ecb ($out)
|
||||
ldl_l $tick,0($out)
|
||||
addl $diff,$tick,$tick
|
||||
mov $tick,$diff
|
||||
stl_c $tick,0($out)
|
||||
stl $diff,0($out)
|
||||
|
||||
subl $max,1,$max
|
||||
beq $max,.Ldone2
|
||||
|
||||
rpcc $tick
|
||||
subq $tick,$lasttick,$diff
|
||||
mov $tick,$lasttick
|
||||
subq $lastdiff,$diff,$tick
|
||||
mov $diff,$lastdiff
|
||||
cmovne $tick,1,$tick
|
||||
subl $cnt,$tick,$cnt
|
||||
s4addq $tick,$out,$out
|
||||
bne $cnt,.Loop2
|
||||
|
||||
.Ldone2:
|
||||
subl $v0,$cnt,$v0
|
||||
ret ($ra)
|
||||
.end OPENSSL_instrument_bus2
|
||||
___
|
||||
}
|
||||
|
|
|
@ -26,7 +26,7 @@ OPENSSL_atomic_add:
|
|||
{ .mii; mov ar.ccv=r2
|
||||
add r8=r2,r33
|
||||
mov r3=r2 };;
|
||||
{ .mmi; mf
|
||||
{ .mmi; mf;;
|
||||
cmpxchg4.acq r2=[r32],r8,ar.ccv
|
||||
nop.i 0 };;
|
||||
{ .mib; cmp.ne p6,p0=r2,r3
|
||||
|
@ -165,3 +165,89 @@ OPENSSL_cleanse:
|
|||
(p7) br.cond.dpnt .Little
|
||||
(p6) br.ret.sptk.many b0 };;
|
||||
.endp OPENSSL_cleanse#
|
||||
|
||||
.global OPENSSL_instrument_bus#
|
||||
.proc OPENSSL_instrument_bus#
|
||||
OPENSSL_instrument_cache:
|
||||
{ .mmi; mov r2=r33
|
||||
#if defined(_HPUX_SOURCE) && !defined(_LP64)
|
||||
addp4 r32=0,r32
|
||||
#endif
|
||||
}
|
||||
{ .mmi; mov r8=ar.itc;;
|
||||
mov r10=r0
|
||||
mov r9=r8 };;
|
||||
|
||||
{ .mmi; fc r32;;
|
||||
ld4 r8=[r32] };;
|
||||
{ .mmi; mf
|
||||
mov ar.ccv=r8
|
||||
add r8=r8,r10 };;
|
||||
{ .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv
|
||||
};;
|
||||
.Loop:
|
||||
{ .mmi; mov r8=ar.itc;;
|
||||
sub r10=r8,r9 // diff=tick-lasttick
|
||||
mov r9=r8 };; // lasttick=tick
|
||||
{ .mmi; fc r32;;
|
||||
ld4 r8=[r32] };;
|
||||
{ .mmi; mf
|
||||
mov ar.ccv=r8
|
||||
add r8=r8,r10 };;
|
||||
{ .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv
|
||||
add r33=-1,r33
|
||||
add r32=4,r32 };;
|
||||
{ .mib; cmp4.ne p6,p0=0,r33
|
||||
(p6) br.cond.dptk .Loop };;
|
||||
|
||||
{ .mib; sub r8=r2,r33
|
||||
br.ret.sptk.many b0 };;
|
||||
.endp OPENSSL_instrument_bus#
|
||||
|
||||
.global OPENSSL_instrument_bus2#
|
||||
.proc OPENSSL_instrument_bus2#
|
||||
OPENSSL_instrument_cache2:
|
||||
{ .mmi; mov r2=r33 // put aside cnt
|
||||
#if defined(_HPUX_SOURCE) && !defined(_LP64)
|
||||
addp4 r32=0,r32
|
||||
#endif
|
||||
}
|
||||
{ .mmi; mov r8=ar.itc;;
|
||||
mov r10=r0
|
||||
mov r9=r8 };;
|
||||
|
||||
{ .mmi; fc r32;;
|
||||
ld4 r8=[r32] };;
|
||||
{ .mmi; mf
|
||||
mov ar.ccv=r8
|
||||
add r8=r8,r10 };;
|
||||
{ .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv
|
||||
};;
|
||||
|
||||
{ .mmi; mov r8=ar.itc;;
|
||||
sub r10=r8,r9
|
||||
mov r9=r8 };;
|
||||
.Loop2:
|
||||
{ .mmi; mov r11=r10 // lastdiff=diff
|
||||
add r34=-1,r34 };; // --max
|
||||
{ .mmi; fc r32;;
|
||||
ld4 r8=[r32]
|
||||
cmp4.eq p6,p0=0,r34 };;
|
||||
{ .mmi; mf
|
||||
mov ar.ccv=r8
|
||||
add r8=r8,r10 };;
|
||||
{ .mmb; cmpxchg4.acq r3=[r32],r8,ar.ccv
|
||||
(p6) br.cond.spnt .Ldone2 };;
|
||||
|
||||
{ .mmi; mov r8=ar.itc;;
|
||||
sub r10=r8,r9 // diff=tick-lasttick
|
||||
mov r9=r8 };; // lasttick=tick
|
||||
{ .mmi; cmp.ne p6,p0=r10,r11;; // diff!=lastdiff
|
||||
(p6) add r33=-1,r33 };; // conditional --cnt
|
||||
{ .mib; cmp4.ne p7,p0=0,r33
|
||||
(p6) add r32=4,r32 // conditional ++out
|
||||
(p7) br.cond.dptk .Loop2 };;
|
||||
.Ldone2:
|
||||
{ .mib; sub r8=r2,r33
|
||||
br.ret.sptk.many b0 };;
|
||||
.endp OPENSSL_instrument_bus2#
|
||||
|
|
|
@ -87,8 +87,8 @@ OPENSSL_wipe_cpu
|
|||
.PROCEND
|
||||
___
|
||||
{
|
||||
$inp="%r26";
|
||||
$len="%r25";
|
||||
my $inp="%r26";
|
||||
my $len="%r25";
|
||||
|
||||
$code.=<<___;
|
||||
.EXPORT OPENSSL_cleanse,ENTRY,ARGW0=GR,ARGW1=GR
|
||||
|
@ -112,9 +112,9 @@ Lalign
|
|||
|
||||
Laligned
|
||||
andcm $len,%r1,%r28
|
||||
Loop
|
||||
Lot
|
||||
$ST %r0,0($inp)
|
||||
addib,*<> -$SIZE_T,%r28,Loop
|
||||
addib,*<> -$SIZE_T,%r28,Lot
|
||||
ldo $SIZE_T($inp),$inp
|
||||
|
||||
and,*<> $len,%r1,$len
|
||||
|
@ -130,7 +130,93 @@ Ldone
|
|||
.PROCEND
|
||||
___
|
||||
}
|
||||
{
|
||||
my ($out,$cnt,$max)=("%r26","%r25","%r24");
|
||||
my ($tick,$lasttick)=("%r23","%r22");
|
||||
my ($diff,$lastdiff)=("%r21","%r20");
|
||||
|
||||
$code.=<<___;
|
||||
.EXPORT OPENSSL_instrument_bus,ENTRY,ARGW0=GR,ARGW1=GR
|
||||
.ALIGN 8
|
||||
OPENSSL_instrument_bus
|
||||
.PROC
|
||||
.CALLINFO NO_CALLS
|
||||
.ENTRY
|
||||
copy $cnt,$rv
|
||||
mfctl %cr16,$tick
|
||||
copy $tick,$lasttick
|
||||
ldi 0,$diff
|
||||
|
||||
fdc 0($out)
|
||||
ldw 0($out),$tick
|
||||
add $diff,$tick,$tick
|
||||
stw $tick,0($out)
|
||||
Loop
|
||||
mfctl %cr16,$tick
|
||||
sub $tick,$lasttick,$diff
|
||||
copy $tick,$lasttick
|
||||
|
||||
fdc 0($out)
|
||||
ldw 0($out),$tick
|
||||
add $diff,$tick,$tick
|
||||
stw $tick,0($out)
|
||||
|
||||
addib,<> -1,$cnt,Loop
|
||||
addi 4,$out,$out
|
||||
|
||||
bv ($rp)
|
||||
.EXIT
|
||||
sub $rv,$cnt,$rv
|
||||
.PROCEND
|
||||
|
||||
.EXPORT OPENSSL_instrument_bus2,ENTRY,ARGW0=GR,ARGW1=GR
|
||||
.ALIGN 8
|
||||
OPENSSL_instrument_bus2
|
||||
.PROC
|
||||
.CALLINFO NO_CALLS
|
||||
.ENTRY
|
||||
copy $cnt,$rv
|
||||
sub %r0,$cnt,$cnt
|
||||
|
||||
mfctl %cr16,$tick
|
||||
copy $tick,$lasttick
|
||||
ldi 0,$diff
|
||||
|
||||
fdc 0($out)
|
||||
ldw 0($out),$tick
|
||||
add $diff,$tick,$tick
|
||||
stw $tick,0($out)
|
||||
|
||||
mfctl %cr16,$tick
|
||||
sub $tick,$lasttick,$diff
|
||||
copy $tick,$lasttick
|
||||
Loop2
|
||||
copy $diff,$lastdiff
|
||||
fdc 0($out)
|
||||
ldw 0($out),$tick
|
||||
add $diff,$tick,$tick
|
||||
stw $tick,0($out)
|
||||
|
||||
addib,= -1,$max,Ldone2
|
||||
nop
|
||||
|
||||
mfctl %cr16,$tick
|
||||
sub $tick,$lasttick,$diff
|
||||
copy $tick,$lasttick
|
||||
cmpclr,<> $lastdiff,$diff,$tick
|
||||
ldi 1,$tick
|
||||
|
||||
ldi 1,%r1
|
||||
xor %r1,$tick,$tick
|
||||
addb,<> $tick,$cnt,Loop2
|
||||
shladd,l $tick,2,$out,$out
|
||||
Ldone2
|
||||
bv ($rp)
|
||||
.EXIT
|
||||
add $rv,$cnt,$rv
|
||||
.PROCEND
|
||||
___
|
||||
}
|
||||
$code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4);
|
||||
$code =~ s/,\*/,/gm if ($SIZE_T==4);
|
||||
print $code;
|
||||
|
|
|
@ -69,10 +69,10 @@ $code=<<___;
|
|||
.globl .OPENSSL_atomic_add
|
||||
.align 4
|
||||
.OPENSSL_atomic_add:
|
||||
Loop: lwarx r5,0,r3
|
||||
Ladd: lwarx r5,0,r3
|
||||
add r0,r4,r5
|
||||
stwcx. r0,0,r3
|
||||
bne- Loop
|
||||
bne- Ladd
|
||||
$SIGNX r3,r0
|
||||
blr
|
||||
|
||||
|
@ -112,6 +112,89 @@ Laligned:
|
|||
bne Little
|
||||
blr
|
||||
___
|
||||
{
|
||||
my ($out,$cnt,$max)=("r3","r4","r5");
|
||||
my ($tick,$lasttick)=("r6","r7");
|
||||
my ($diff,$lastdiff)=("r8","r9");
|
||||
|
||||
$code.=<<___;
|
||||
.globl .OPENSSL_instrument_bus
|
||||
.align 4
|
||||
.OPENSSL_instrument_bus:
|
||||
mtctr $cnt
|
||||
|
||||
mftb $lasttick # collect 1st tick
|
||||
li $diff,0
|
||||
|
||||
dcbf 0,$out # flush cache line
|
||||
lwarx $tick,0,$out # load and lock
|
||||
add $tick,$tick,$diff
|
||||
stwcx. $tick,0,$out
|
||||
stwx $tick,0,$out
|
||||
|
||||
Loop: mftb $tick
|
||||
sub $diff,$tick,$lasttick
|
||||
mr $lasttick,$tick
|
||||
dcbf 0,$out # flush cache line
|
||||
lwarx $tick,0,$out # load and lock
|
||||
add $tick,$tick,$diff
|
||||
stwcx. $tick,0,$out
|
||||
stwx $tick,0,$out
|
||||
addi $out,$out,4 # ++$out
|
||||
bdnz Loop
|
||||
|
||||
mr r3,$cnt
|
||||
blr
|
||||
|
||||
.globl .OPENSSL_instrument_bus2
|
||||
.align 4
|
||||
.OPENSSL_instrument_bus2:
|
||||
mr r0,$cnt
|
||||
slwi $cnt,$cnt,2
|
||||
|
||||
mftb $lasttick # collect 1st tick
|
||||
li $diff,0
|
||||
|
||||
dcbf 0,$out # flush cache line
|
||||
lwarx $tick,0,$out # load and lock
|
||||
add $tick,$tick,$diff
|
||||
stwcx. $tick,0,$out
|
||||
stwx $tick,0,$out
|
||||
|
||||
mftb $tick # collect 1st diff
|
||||
sub $diff,$tick,$lasttick
|
||||
mr $lasttick,$tick
|
||||
mr $lastdiff,$diff
|
||||
Loop2:
|
||||
dcbf 0,$out # flush cache line
|
||||
lwarx $tick,0,$out # load and lock
|
||||
add $tick,$tick,$diff
|
||||
stwcx. $tick,0,$out
|
||||
stwx $tick,0,$out
|
||||
|
||||
addic. $max,$max,-1
|
||||
beq Ldone2
|
||||
|
||||
mftb $tick
|
||||
sub $diff,$tick,$lasttick
|
||||
mr $lasttick,$tick
|
||||
cmplw 7,$diff,$lastdiff
|
||||
mr $lastdiff,$diff
|
||||
|
||||
mfcr $tick # pull cr
|
||||
not $tick,$tick # flip bits
|
||||
rlwinm $tick,$tick,1,29,29 # isolate flipped eq bit and scale
|
||||
|
||||
sub. $cnt,$cnt,$tick # conditional --$cnt
|
||||
add $out,$out,$tick # conditional ++$out
|
||||
bne Loop2
|
||||
|
||||
Ldone2:
|
||||
srwi $cnt,$cnt,2
|
||||
sub r3,r0,$cnt
|
||||
blr
|
||||
___
|
||||
}
|
||||
|
||||
$code =~ s/\`([^\`]*)\`/eval $1/gem;
|
||||
print $code;
|
||||
|
|
|
@ -93,6 +93,22 @@ OPENSSL_cleanse:
|
|||
br %r14
|
||||
.size OPENSSL_cleanse,.-OPENSSL_cleanse
|
||||
|
||||
.globl OPENSSL_instrument_bus
|
||||
.type OPENSSL_instrument_bus,@function
|
||||
.align 16
|
||||
OPENSSL_instrument_bus:
|
||||
lghi %r2,0
|
||||
br %r14
|
||||
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
|
||||
|
||||
.globl OPENSSL_instrument_bus2
|
||||
.type OPENSSL_instrument_bus2,@function
|
||||
.align 16
|
||||
OPENSSL_instrument_bus2:
|
||||
lghi %r2,0
|
||||
br %r14
|
||||
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
|
||||
|
||||
.section .init
|
||||
brasl %r14,OPENSSL_cpuid_setup
|
||||
|
||||
|
|
|
@ -397,6 +397,102 @@ OPENSSL_cleanse:
|
|||
.type OPENSSL_cleanse,#function
|
||||
.size OPENSSL_cleanse,.-OPENSSL_cleanse
|
||||
|
||||
.global _sparcv9_vis1_instrument_bus
|
||||
.align 8
|
||||
_sparcv9_vis1_instrument_bus:
|
||||
mov %o1,%o3 ! save cnt
|
||||
.word 0x99410000 !rd %tick,%o4 ! tick
|
||||
mov %o4,%o5 ! lasttick = tick
|
||||
set 0,%g4 ! diff
|
||||
|
||||
andn %o0,63,%g1
|
||||
.word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
|
||||
.word 0x8143e040 !membar #Sync
|
||||
.word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
|
||||
.word 0x8143e040 !membar #Sync
|
||||
ld [%o0],%o4
|
||||
add %o4,%g4,%g4
|
||||
.word 0xc9e2100c !cas [%o0],%o4,%g4
|
||||
|
||||
.Loop: .word 0x99410000 !rd %tick,%o4
|
||||
sub %o4,%o5,%g4 ! diff=tick-lasttick
|
||||
mov %o4,%o5 ! lasttick=tick
|
||||
|
||||
andn %o0,63,%g1
|
||||
.word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
|
||||
.word 0x8143e040 !membar #Sync
|
||||
.word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
|
||||
.word 0x8143e040 !membar #Sync
|
||||
ld [%o0],%o4
|
||||
add %o4,%g4,%g4
|
||||
.word 0xc9e2100c !cas [%o0],%o4,%g4
|
||||
subcc %o1,1,%o1 ! --$cnt
|
||||
bnz .Loop
|
||||
add %o0,4,%o0 ! ++$out
|
||||
|
||||
retl
|
||||
mov %o3,%o0
|
||||
.type _sparcv9_vis1_instrument_bus,#function
|
||||
.size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
|
||||
|
||||
.global _sparcv9_vis1_instrument_bus2
|
||||
.align 8
|
||||
_sparcv9_vis1_instrument_bus2:
|
||||
mov %o1,%o3 ! save cnt
|
||||
sll %o1,2,%o1 ! cnt*=4
|
||||
|
||||
.word 0x99410000 !rd %tick,%o4 ! tick
|
||||
mov %o4,%o5 ! lasttick = tick
|
||||
set 0,%g4 ! diff
|
||||
|
||||
andn %o0,63,%g1
|
||||
.word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
|
||||
.word 0x8143e040 !membar #Sync
|
||||
.word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
|
||||
.word 0x8143e040 !membar #Sync
|
||||
ld [%o0],%o4
|
||||
add %o4,%g4,%g4
|
||||
.word 0xc9e2100c !cas [%o0],%o4,%g4
|
||||
|
||||
.word 0x99410000 !rd %tick,%o4 ! tick
|
||||
sub %o4,%o5,%g4 ! diff=tick-lasttick
|
||||
mov %o4,%o5 ! lasttick=tick
|
||||
mov %g4,%g5 ! lastdiff=diff
|
||||
.Loop2:
|
||||
andn %o0,63,%g1
|
||||
.word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
|
||||
.word 0x8143e040 !membar #Sync
|
||||
.word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
|
||||
.word 0x8143e040 !membar #Sync
|
||||
ld [%o0],%o4
|
||||
add %o4,%g4,%g4
|
||||
.word 0xc9e2100c !cas [%o0],%o4,%g4
|
||||
|
||||
subcc %o2,1,%o2 ! --max
|
||||
bz .Ldone2
|
||||
nop
|
||||
|
||||
.word 0x99410000 !rd %tick,%o4 ! tick
|
||||
sub %o4,%o5,%g4 ! diff=tick-lasttick
|
||||
mov %o4,%o5 ! lasttick=tick
|
||||
cmp %g4,%g5
|
||||
mov %g4,%g5 ! lastdiff=diff
|
||||
|
||||
.word 0x83408000 !rd %ccr,%g1
|
||||
and %g1,4,%g1 ! isolate zero flag
|
||||
xor %g1,4,%g1 ! flip zero flag
|
||||
|
||||
subcc %o1,%g1,%o1 ! conditional --$cnt
|
||||
bnz .Loop2
|
||||
add %o0,%g1,%o0 ! conditional ++$out
|
||||
|
||||
.Ldone2:
|
||||
srl %o1,2,%o1
|
||||
retl
|
||||
sub %o3,%o1,%o0
|
||||
.type _sparcv9_vis1_instrument_bus2,#function
|
||||
.size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
|
||||
|
||||
.section ".init",#alloc,#execinstr
|
||||
call OPENSSL_cpuid_setup
|
||||
nop
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#define SPARCV9_VIS1 (1<<2)
|
||||
#define SPARCV9_VIS2 (1<<3) /* reserved */
|
||||
#define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */
|
||||
#define SPARCV9_BLK (1<<5) /* VIS1 block copy */
|
||||
|
||||
static int OPENSSL_sparcv9cap_P=SPARCV9_TICK_PRIVILEGED;
|
||||
|
||||
|
@ -31,6 +32,8 @@ void _sparcv9_vis1_probe(void);
|
|||
unsigned long _sparcv9_vis1_instrument(void);
|
||||
void _sparcv9_vis2_probe(void);
|
||||
void _sparcv9_fmadd_probe(void);
|
||||
size_t _sparcv9_vis1_instrument_bus(unsigned int *,size_t);
|
||||
size_t _sparcv8_vis1_instrument_bus2(unsigned int *,size_t,size_t);
|
||||
|
||||
unsigned long OPENSSL_rdtsc(void)
|
||||
{
|
||||
|
@ -44,6 +47,24 @@ unsigned long OPENSSL_rdtsc(void)
|
|||
return _sparcv9_rdtick();
|
||||
}
|
||||
|
||||
size_t OPENSSL_instrument_bus(unsigned int *out,size_t cnt)
|
||||
{
|
||||
if (OPENSSL_sparcv9cap_P&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK) ==
|
||||
SPARCV9_BLK)
|
||||
return _sparcv9_vis1_instrument_bus(out,cnt);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t OPENSSL_instrument_bus2(unsigned int *out,size_t cnt,size_t max)
|
||||
{
|
||||
if (OPENSSL_sparcv9cap_P&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK) ==
|
||||
SPARCV9_BLK)
|
||||
return _sparcv9_vis1_instrument_bus2(out,cnt,max);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if 0 && defined(__sun) && defined(__SVR4)
|
||||
/* This code path is disabled, because of incompatibility of
|
||||
* libdevinfo.so.1 and libmalloc.so.1 (see below for details)
|
||||
|
@ -112,7 +133,7 @@ void OPENSSL_cpuid_setup(void)
|
|||
if (sysinfo(SI_ISALIST,si,sizeof(si))>0)
|
||||
{
|
||||
if (strstr(si,"+vis"))
|
||||
OPENSSL_sparcv9cap_P |= SPARCV9_VIS1;
|
||||
OPENSSL_sparcv9cap_P |= SPARCV9_VIS1|SPARCV9_BLK;
|
||||
if (strstr(si,"+vis2"))
|
||||
{
|
||||
OPENSSL_sparcv9cap_P |= SPARCV9_VIS2;
|
||||
|
@ -169,7 +190,6 @@ void OPENSSL_cpuid_setup(void)
|
|||
char *e;
|
||||
struct sigaction common_act,ill_oact,bus_oact;
|
||||
sigset_t all_masked,oset;
|
||||
int sig;
|
||||
static int trigger=0;
|
||||
|
||||
if (trigger) return;
|
||||
|
@ -211,7 +231,7 @@ void OPENSSL_cpuid_setup(void)
|
|||
if (sigsetjmp(common_jmp,1) == 0)
|
||||
{
|
||||
_sparcv9_vis1_probe();
|
||||
OPENSSL_sparcv9cap_P |= SPARCV9_VIS1;
|
||||
OPENSSL_sparcv9cap_P |= SPARCV9_VIS1|SPARCV9_BLK;
|
||||
/* detect UltraSPARC-Tx, see sparccpud.S for details... */
|
||||
if (_sparcv9_vis1_instrument() >= 12)
|
||||
OPENSSL_sparcv9cap_P &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU);
|
||||
|
|
|
@ -9,8 +9,9 @@ $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
|
|||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output";
|
||||
|
||||
if ($win64) { $arg1="%rcx"; $arg2="%rdx"; }
|
||||
else { $arg1="%rdi"; $arg2="%rsi"; }
|
||||
($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
|
||||
("%rdi","%rsi","%rdx","%rcx"); # Unix order
|
||||
|
||||
print<<___;
|
||||
.extern OPENSSL_cpuid_setup
|
||||
.section .init
|
||||
|
@ -228,5 +229,95 @@ OPENSSL_wipe_cpu:
|
|||
ret
|
||||
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
||||
___
|
||||
{
|
||||
my $out="%r10";
|
||||
my $cnt="%rcx";
|
||||
my $max="%r11";
|
||||
my $lasttick="%r8d";
|
||||
my $lastdiff="%r9d";
|
||||
my $redzone=win64?8:-8;
|
||||
|
||||
print<<___;
|
||||
.globl OPENSSL_instrument_bus
|
||||
.type OPENSSL_instrument_bus,\@abi-omnipotent
|
||||
.align 16
|
||||
OPENSSL_instrument_bus:
|
||||
mov $arg1,$out # tribute to Win64
|
||||
mov $arg2,$cnt
|
||||
mov $arg2,$max
|
||||
|
||||
rdtsc # collect 1st tick
|
||||
mov %eax,$lasttick # lasttick = tick
|
||||
mov \$0,$lastdiff # lastdiff = 0
|
||||
clflush ($out)
|
||||
lock
|
||||
add $lastdiff,($out)
|
||||
jmp .Loop
|
||||
.align 16
|
||||
.Loop: rdtsc
|
||||
mov %eax,%edx
|
||||
sub $lasttick,%eax
|
||||
mov %edx,$lasttick
|
||||
mov %eax,$lastdiff
|
||||
clflush ($out)
|
||||
lock
|
||||
add %eax,($out)
|
||||
lea 4($out),$out
|
||||
sub \$1,$cnt
|
||||
jnz .Loop
|
||||
|
||||
mov $max,%rax
|
||||
ret
|
||||
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
|
||||
|
||||
.globl OPENSSL_instrument_bus2
|
||||
.type OPENSSL_instrument_bus2,\@abi-omnipotent
|
||||
.align 16
|
||||
OPENSSL_instrument_bus2:
|
||||
mov $arg1,$out # tribute to Win64
|
||||
mov $arg2,$cnt
|
||||
mov $arg3,$max
|
||||
mov $cnt,$redzone(%rsp)
|
||||
|
||||
rdtsc # collect 1st tick
|
||||
mov %eax,$lasttick # lasttick = tick
|
||||
mov \$0,$lastdiff # lastdiff = 0
|
||||
|
||||
clflush ($out)
|
||||
lock
|
||||
add $lastdiff,($out)
|
||||
|
||||
rdtsc # collect 1st diff
|
||||
mov %eax,%edx
|
||||
sub $lasttick,%eax # diff
|
||||
mov %edx,$lasttick # lasttick = tick
|
||||
mov %eax,$lastdiff # lastdiff = diff
|
||||
.Loop2:
|
||||
clflush ($out)
|
||||
lock
|
||||
add %eax,($out) # accumulate diff
|
||||
|
||||
sub \$1,$max
|
||||
jz .Ldone2
|
||||
|
||||
rdtsc
|
||||
mov %eax,%edx
|
||||
sub $lasttick,%eax # diff
|
||||
mov %edx,$lasttick # lasttick = tick
|
||||
cmp $lastdiff,%eax
|
||||
mov %eax,$lastdiff # lastdiff = diff
|
||||
mov \$0,%edx
|
||||
setne %dl
|
||||
sub %rdx,$cnt # conditional --$cnt
|
||||
lea ($out,%rdx,4),$out # conditional ++$out
|
||||
jnz .Loop2
|
||||
|
||||
.Ldone2:
|
||||
mov $redzone(%rsp),%rax
|
||||
sub $cnt,%rax
|
||||
ret
|
||||
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
|
||||
___
|
||||
}
|
||||
|
||||
close STDOUT; # flush
|
||||
|
|
|
@ -307,6 +307,108 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
|||
&ret ();
|
||||
&function_end_B("OPENSSL_cleanse");
|
||||
|
||||
{
|
||||
my $lasttick = "esi";
|
||||
my $lastdiff = "ebx";
|
||||
my $out = "edi";
|
||||
my $cnt = "ecx";
|
||||
my $max = "ebp";
|
||||
|
||||
&function_begin("OPENSSL_instrument_bus");
|
||||
&mov ("eax",0);
|
||||
if ($sse2) {
|
||||
&picmeup("edx","OPENSSL_ia32cap_P");
|
||||
&bt (&DWP(0,"edx"),4);
|
||||
&jnc (&label("nogo")); # no TSC
|
||||
&bt (&DWP(0,"edx"),19);
|
||||
&jnc (&label("nogo")); # no CLFLUSH
|
||||
|
||||
&mov ($out,&wparam(0)); # load arguments
|
||||
&mov ($cnt,&wparam(1));
|
||||
|
||||
# collect 1st tick
|
||||
&rdtsc ();
|
||||
&mov ($lasttick,"eax"); # lasttick = tick
|
||||
&mov ($lastdiff,0); # lastdiff = 0
|
||||
&clflush(&DWP(0,$out));
|
||||
&lock ();
|
||||
&add (&DWP(0,$out),$lastdiff);
|
||||
&jmp (&label("loop"));
|
||||
|
||||
&set_label("loop",16);
|
||||
&rdtsc ();
|
||||
&mov ("edx","eax"); # put aside tick (yes, I neglect edx)
|
||||
&sub ("eax",$lasttick); # diff
|
||||
&mov ($lasttick,"edx"); # lasttick = tick
|
||||
&mov ($lastdiff,"eax"); # lastdiff = diff
|
||||
&clflush(&DWP(0,$out));
|
||||
&lock ();
|
||||
&add (&DWP(0,$out),"eax"); # accumulate diff
|
||||
&lea ($out,&DWP(4,$out)); # ++$out
|
||||
&sub ($cnt,1); # --$cnt
|
||||
&jnz (&label("loop"));
|
||||
|
||||
&mov ("eax",&wparam(1));
|
||||
&set_label("nogo");
|
||||
}
|
||||
&function_end("OPENSSL_instrument_bus");
|
||||
|
||||
&function_begin("OPENSSL_instrument_bus2");
|
||||
&mov ("eax",0);
|
||||
if ($sse2) {
|
||||
&picmeup("edx","OPENSSL_ia32cap_P");
|
||||
&bt (&DWP(0,"edx"),4);
|
||||
&jnc (&label("nogo")); # no TSC
|
||||
&bt (&DWP(0,"edx"),19);
|
||||
&jnc (&label("nogo")); # no CLFLUSH
|
||||
|
||||
&mov ($out,&wparam(0)); # load arguments
|
||||
&mov ($cnt,&wparam(1));
|
||||
&mov ($max,&wparam(2));
|
||||
|
||||
&rdtsc (); # collect 1st tick
|
||||
&mov ($lasttick,"eax"); # lasttick = tick
|
||||
&mov ($lastdiff,0); # lastdiff = 0
|
||||
|
||||
&clflush(&DWP(0,$out));
|
||||
&lock ();
|
||||
&add (&DWP(0,$out),$lastdiff);
|
||||
|
||||
&rdtsc (); # collect 1st diff
|
||||
&mov ("edx","eax"); # put aside tick (yes, I neglect edx)
|
||||
&sub ("eax",$lasttick); # diff
|
||||
&mov ($lasttick,"edx"); # lasttick = tick
|
||||
&mov ($lastdiff,"eax"); # lastdiff = diff
|
||||
&jmp (&label("loop2"));
|
||||
|
||||
&set_label("loop2",16);
|
||||
&clflush(&DWP(0,$out));
|
||||
&lock ();
|
||||
&add (&DWP(0,$out),"eax"); # accumulate diff
|
||||
|
||||
&sub ($max,1);
|
||||
&jz (&label("done2"));
|
||||
|
||||
&rdtsc ();
|
||||
&mov ("edx","eax"); # put aside tick (yes, I neglect edx)
|
||||
&sub ("eax",$lasttick); # diff
|
||||
&mov ($lasttick,"edx"); # lasttick = tick
|
||||
&cmp ("eax",$lastdiff);
|
||||
&mov ($lastdiff,"eax"); # lastdiff = diff
|
||||
&mov ("edx",0);
|
||||
&setne ("dl");
|
||||
&sub ($cnt,"edx"); # conditional --$cnt
|
||||
&lea ($out,&DWP(0,$out,"edx",4)); # conditional ++$out
|
||||
&jnz (&label("loop2"));
|
||||
|
||||
&set_label("done2");
|
||||
&mov ("eax",&wparam(1));
|
||||
&sub ("eax",$cnt);
|
||||
&set_label("nogo");
|
||||
}
|
||||
&function_end("OPENSSL_instrument_bus2");
|
||||
}
|
||||
|
||||
&initseg("OPENSSL_cpuid_setup");
|
||||
|
||||
&asm_finish();
|
||||
|
|
42
doc/crypto/OPENSSL_instrument_bus.pod
Normal file
42
doc/crypto/OPENSSL_instrument_bus.pod
Normal file
|
@ -0,0 +1,42 @@
|
|||
=pod
|
||||
|
||||
=head1 NAME
|
||||
|
||||
OPENSSL_instrument_bus[2] - instrument references to memory bus
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
#ifdef OPENSSL_CPUID_OBJ
|
||||
size_t OPENSSL_instrument_bus (int *vector,size_t num);
|
||||
size_t OPENSSL_instrument_bus2(int *vector,size_t num,size_t max);
|
||||
#endif
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
It was empirically found that timings of references to primary memory
|
||||
are subject to irregular, apparently non-deterministic variations. The
|
||||
subroutines in question instrument these references for purposes of
|
||||
gathering entropy for random number generator. In order to make it
|
||||
bus-bound a 'flush cache line' instruction is used between probes. In
|
||||
addition probes are added to B<vector> elements in atomic or
|
||||
interlocked manner, which should contribute additional noise on
|
||||
multi-processor systems. This also means that B<vector[num]> should be
|
||||
zeroed upon invocation (if you want to retrieve actual probe values).
|
||||
|
||||
OPENSSL_instrument_bus performs B<num> probes and records the number of
|
||||
oscillator cycles every probe took.
|
||||
|
||||
OPENSSL_instrument_bus2 on the other hand B<accumulates> consecutive
|
||||
probes with the same value, i.e. in a way it records duration of
|
||||
periods when probe values appeared deterministic. The subroutine
|
||||
performs at most B<max> probes in attempt to fill the B<vector[num]>,
|
||||
with B<max> value of 0 meaning "as many as it takes."
|
||||
|
||||
=head1 RETURN VALUE
|
||||
|
||||
Return value of 0 indicates that CPU is not capable of performing the
|
||||
benchmark, either because oscillator counter or 'flush cache line' is
|
||||
not available on current platform. For reference, on x86 'flush cache
|
||||
line' was introduced with the SSE2 extensions.
|
||||
|
||||
Otherwise number of recorded values is returned.
|
Loading…
Reference in a new issue