bn/asm/sparcv9-mont.pl: fix squaring code path.
This module is used only with odd input lengths, i.e. not used in normal PKI cases, on contemporary processors. The problem was "illuminated" by fuzzing tests. Reviewed-by: Richard Levitte <levitte@openssl.org>
This commit is contained in:
parent
3ba4dac67a
commit
120a9e1a82
1 changed files with 16 additions and 13 deletions
|
@ -300,7 +300,7 @@ ___
|
|||
######## .Lbn_sqr_mont gives up to 20% *overall* improvement over
|
||||
######## code without following dedicated squaring procedure.
|
||||
########
|
||||
$sbit="%i2"; # re-use $bp!
|
||||
$sbit="%o5";
|
||||
|
||||
$code.=<<___;
|
||||
.align 32
|
||||
|
@ -413,7 +413,7 @@ $code.=<<___;
|
|||
mulx $apj,$mul0,$acc0
|
||||
mulx $npj,$mul1,$acc1
|
||||
add $acc0,$car0,$car0
|
||||
add $tpj,$car1,$car1
|
||||
add $tpj,$sbit,$sbit
|
||||
ld [$ap+$j],$apj ! ap[j]
|
||||
and $car0,$mask,$acc0
|
||||
ld [$np+$j],$npj ! np[j]
|
||||
|
@ -422,7 +422,7 @@ $code.=<<___;
|
|||
ld [$tp+8],$tpj ! tp[j]
|
||||
add $acc0,$acc0,$acc0
|
||||
add $j,4,$j ! j++
|
||||
or $sbit,$acc0,$acc0
|
||||
add $sbit,$acc0,$acc0
|
||||
srlx $acc0,32,$sbit
|
||||
and $acc0,$mask,$acc0
|
||||
cmp $j,$num
|
||||
|
@ -436,12 +436,12 @@ $code.=<<___;
|
|||
mulx $apj,$mul0,$acc0
|
||||
mulx $npj,$mul1,$acc1
|
||||
add $acc0,$car0,$car0
|
||||
add $tpj,$car1,$car1
|
||||
add $tpj,$sbit,$sbit
|
||||
and $car0,$mask,$acc0
|
||||
srlx $car0,32,$car0
|
||||
add $acc1,$car1,$car1
|
||||
add $acc0,$acc0,$acc0
|
||||
or $sbit,$acc0,$acc0
|
||||
add $sbit,$acc0,$acc0
|
||||
srlx $acc0,32,$sbit
|
||||
and $acc0,$mask,$acc0
|
||||
add $acc0,$car1,$car1
|
||||
|
@ -449,7 +449,7 @@ $code.=<<___;
|
|||
srlx $car1,32,$car1
|
||||
|
||||
add $car0,$car0,$car0
|
||||
or $sbit,$car0,$car0
|
||||
add $sbit,$car0,$car0
|
||||
add $car0,$car1,$car1
|
||||
add $car2,$car1,$car1
|
||||
st $car1,[$tp+4]
|
||||
|
@ -509,7 +509,7 @@ $code.=<<___;
|
|||
.Lsqr_inner2:
|
||||
mulx $apj,$mul0,$acc0
|
||||
mulx $npj,$mul1,$acc1
|
||||
add $tpj,$car1,$car1
|
||||
add $tpj,$sbit,$sbit
|
||||
add $acc0,$car0,$car0
|
||||
ld [$ap+$j],$apj ! ap[j]
|
||||
and $car0,$mask,$acc0
|
||||
|
@ -517,7 +517,7 @@ $code.=<<___;
|
|||
srlx $car0,32,$car0
|
||||
add $acc0,$acc0,$acc0
|
||||
ld [$tp+8],$tpj ! tp[j]
|
||||
or $sbit,$acc0,$acc0
|
||||
add $sbit,$acc0,$acc0
|
||||
add $j,4,$j ! j++
|
||||
srlx $acc0,32,$sbit
|
||||
and $acc0,$mask,$acc0
|
||||
|
@ -532,12 +532,12 @@ $code.=<<___;
|
|||
.Lsqr_no_inner2:
|
||||
mulx $apj,$mul0,$acc0
|
||||
mulx $npj,$mul1,$acc1
|
||||
add $tpj,$car1,$car1
|
||||
add $tpj,$sbit,$sbit
|
||||
add $acc0,$car0,$car0
|
||||
and $car0,$mask,$acc0
|
||||
srlx $car0,32,$car0
|
||||
add $acc0,$acc0,$acc0
|
||||
or $sbit,$acc0,$acc0
|
||||
add $sbit,$acc0,$acc0
|
||||
srlx $acc0,32,$sbit
|
||||
and $acc0,$mask,$acc0
|
||||
add $acc0,$car1,$car1
|
||||
|
@ -546,7 +546,7 @@ $code.=<<___;
|
|||
srlx $car1,32,$car1
|
||||
|
||||
add $car0,$car0,$car0
|
||||
or $sbit,$car0,$car0
|
||||
add $sbit,$car0,$car0
|
||||
add $car0,$car1,$car1
|
||||
add $car2,$car1,$car1
|
||||
st $car1,[$tp+4]
|
||||
|
@ -591,14 +591,17 @@ $code.=<<___;
|
|||
!.Lsqr_last
|
||||
|
||||
mulx $npj,$mul1,$acc1
|
||||
add $tpj,$car1,$car1
|
||||
add $tpj,$acc0,$acc0
|
||||
srlx $acc0,32,$tmp0
|
||||
and $acc0,$mask,$acc0
|
||||
add $tmp0,$sbit,$sbit
|
||||
add $acc0,$car1,$car1
|
||||
add $acc1,$car1,$car1
|
||||
st $car1,[$tp]
|
||||
srlx $car1,32,$car1
|
||||
|
||||
add $car0,$car0,$car0 ! recover $car0
|
||||
or $sbit,$car0,$car0
|
||||
add $sbit,$car0,$car0
|
||||
add $car0,$car1,$car1
|
||||
add $car2,$car1,$car1
|
||||
st $car1,[$tp+4]
|
||||
|
|
Loading…
Reference in a new issue