ppccap.c: engage new multipplication and squaring subroutines.
[And remove FPU mutiplication subroutine.] Reviewed-by: Rich Salz <rsalz@openssl.org>
This commit is contained in:
parent
68f6d2a02c
commit
80d27cdb84
3 changed files with 14 additions and 31 deletions
|
@ -278,7 +278,7 @@
|
|||
ppc32_asm => {
|
||||
template => 1,
|
||||
cpuid_asm_src => "ppccpuid.s ppccap.c",
|
||||
bn_asm_src => "bn-ppc.s ppc-mont.s ppc64-mont.s",
|
||||
bn_asm_src => "bn-ppc.s ppc-mont.s",
|
||||
aes_asm_src => "aes_core.c aes_cbc.c aes-ppc.s vpaes-ppc.s aesp8-ppc.s",
|
||||
sha1_asm_src => "sha1-ppc.s sha256-ppc.s sha512-ppc.s sha256p8-ppc.s sha512p8-ppc.s",
|
||||
modes_asm_src => "ghashp8-ppc.s",
|
||||
|
|
|
@ -134,10 +134,7 @@ $code=<<___;
|
|||
.globl .bn_mul_mont_int
|
||||
.align 5
|
||||
.bn_mul_mont_int:
|
||||
cmpwi $num,4
|
||||
mr $rp,r3 ; $rp is reassigned
|
||||
li r3,0
|
||||
bltlr
|
||||
___
|
||||
$code.=<<___ if ($BNSZ==4);
|
||||
cmpwi $num,32 ; longer key performance is not better
|
||||
|
|
|
@ -35,38 +35,24 @@ static sigset_t all_masked;
|
|||
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
||||
const BN_ULONG *np, const BN_ULONG *n0, int num)
|
||||
{
|
||||
int bn_mul_mont_fpu64(BN_ULONG *rp, const BN_ULONG *ap,
|
||||
const BN_ULONG *bp, const BN_ULONG *np,
|
||||
const BN_ULONG *n0, int num);
|
||||
int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
||||
const BN_ULONG *np, const BN_ULONG *n0, int num);
|
||||
int bn_mul4x_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
||||
const BN_ULONG *np, const BN_ULONG *n0, int num);
|
||||
|
||||
if (sizeof(size_t) == 4) {
|
||||
# if 1 || (defined(__APPLE__) && defined(__MACH__))
|
||||
if (num >= 8 && (num & 3) == 0 && (OPENSSL_ppccap_P & PPC_FPU64))
|
||||
return bn_mul_mont_fpu64(rp, ap, bp, np, n0, num);
|
||||
# else
|
||||
/*
|
||||
* boundary of 32 was experimentally determined on Linux 2.6.22,
|
||||
* might have to be adjusted on AIX...
|
||||
*/
|
||||
if (num >= 32 && (num & 3) == 0 && (OPENSSL_ppccap_P & PPC_FPU64)) {
|
||||
sigset_t oset;
|
||||
int ret;
|
||||
if (num < 4)
|
||||
return 0;
|
||||
|
||||
sigprocmask(SIG_SETMASK, &all_masked, &oset);
|
||||
ret = bn_mul_mont_fpu64(rp, ap, bp, np, n0, num);
|
||||
sigprocmask(SIG_SETMASK, &oset, NULL);
|
||||
if ((num & 3) == 0)
|
||||
return bn_mul4x_mont_int(rp, ap, bp, np, n0, num);
|
||||
|
||||
return ret;
|
||||
}
|
||||
# endif
|
||||
} else if ((OPENSSL_ppccap_P & PPC_FPU64))
|
||||
/*
|
||||
* this is a "must" on POWER6, but run-time detection is not
|
||||
* implemented yet...
|
||||
*/
|
||||
return bn_mul_mont_fpu64(rp, ap, bp, np, n0, num);
|
||||
/*
|
||||
* There used to be [optional] call to bn_mul_mont_fpu64 here,
|
||||
* but above subroutine is faster on contemporary processors.
|
||||
* Formulation means that there might be old processors where
|
||||
* FPU code path would be faster, POWER6 perhaps, but there was
|
||||
* no opportunity to figure it out...
|
||||
*/
|
||||
|
||||
return bn_mul_mont_int(rp, ap, bp, np, n0, num);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue