BN update from HEAD.
This commit is contained in:
parent
70b52222f5
commit
9f1c5491d2
7 changed files with 657 additions and 467 deletions
|
@ -169,15 +169,13 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
|
|||
#endif /* OPENSSL_NO_ASM */
|
||||
|
||||
|
||||
/* BN_div[_no_branch] computes dv := num / divisor, rounding towards
|
||||
/* BN_div computes dv := num / divisor, rounding towards
|
||||
* zero, and sets up rm such that dv*divisor + rm = num holds.
|
||||
* Thus:
|
||||
* dv->neg == num->neg ^ divisor->neg (unless the result is zero)
|
||||
* rm->neg == num->neg (unless the remainder is zero)
|
||||
* If 'dv' or 'rm' is NULL, the respective value is not returned.
|
||||
*/
|
||||
static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
|
||||
const BIGNUM *divisor, BN_CTX *ctx);
|
||||
int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
|
||||
BN_CTX *ctx)
|
||||
{
|
||||
|
@ -186,6 +184,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
|
|||
BN_ULONG *resp,*wnump;
|
||||
BN_ULONG d0,d1;
|
||||
int num_n,div_n;
|
||||
int no_branch=0;
|
||||
|
||||
/* Invalid zero-padding would have particularly bad consequences
|
||||
* in the case of 'num', so don't just rely on bn_check_top() for this one
|
||||
|
@ -200,7 +199,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
|
|||
|
||||
if ((BN_get_flags(num, BN_FLG_CONSTTIME) != 0) || (BN_get_flags(divisor, BN_FLG_CONSTTIME) != 0))
|
||||
{
|
||||
return BN_div_no_branch(dv, rm, num, divisor, ctx);
|
||||
no_branch=1;
|
||||
}
|
||||
|
||||
bn_check_top(dv);
|
||||
|
@ -214,7 +213,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
|
|||
return(0);
|
||||
}
|
||||
|
||||
if (BN_ucmp(num,divisor) < 0)
|
||||
if (!no_branch && BN_ucmp(num,divisor) < 0)
|
||||
{
|
||||
if (rm != NULL)
|
||||
{ if (BN_copy(rm,num) == NULL) return(0); }
|
||||
|
@ -239,242 +238,25 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
|
|||
norm_shift+=BN_BITS2;
|
||||
if (!(BN_lshift(snum,num,norm_shift))) goto err;
|
||||
snum->neg=0;
|
||||
div_n=sdiv->top;
|
||||
num_n=snum->top;
|
||||
loop=num_n-div_n;
|
||||
/* Lets setup a 'window' into snum
|
||||
* This is the part that corresponds to the current
|
||||
* 'area' being divided */
|
||||
wnum.neg = 0;
|
||||
wnum.d = &(snum->d[loop]);
|
||||
wnum.top = div_n;
|
||||
/* only needed when BN_ucmp messes up the values between top and max */
|
||||
wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */
|
||||
|
||||
/* Get the top 2 words of sdiv */
|
||||
/* div_n=sdiv->top; */
|
||||
d0=sdiv->d[div_n-1];
|
||||
d1=(div_n == 1)?0:sdiv->d[div_n-2];
|
||||
|
||||
/* pointer to the 'top' of snum */
|
||||
wnump= &(snum->d[num_n-1]);
|
||||
|
||||
/* Setup to 'res' */
|
||||
res->neg= (num->neg^divisor->neg);
|
||||
if (!bn_wexpand(res,(loop+1))) goto err;
|
||||
res->top=loop;
|
||||
resp= &(res->d[loop-1]);
|
||||
|
||||
/* space for temp */
|
||||
if (!bn_wexpand(tmp,(div_n+1))) goto err;
|
||||
|
||||
if (BN_ucmp(&wnum,sdiv) >= 0)
|
||||
if (no_branch)
|
||||
{
|
||||
/* If BN_DEBUG_RAND is defined BN_ucmp changes (via
|
||||
* bn_pollute) the const bignum arguments =>
|
||||
* clean the values between top and max again */
|
||||
bn_clear_top2max(&wnum);
|
||||
bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n);
|
||||
*resp=1;
|
||||
}
|
||||
else
|
||||
res->top--;
|
||||
/* if res->top == 0 then clear the neg value otherwise decrease
|
||||
* the resp pointer */
|
||||
if (res->top == 0)
|
||||
res->neg = 0;
|
||||
else
|
||||
resp--;
|
||||
|
||||
for (i=0; i<loop-1; i++, wnump--, resp--)
|
||||
{
|
||||
BN_ULONG q,l0;
|
||||
/* the first part of the loop uses the top two words of
|
||||
* snum and sdiv to calculate a BN_ULONG q such that
|
||||
* | wnum - sdiv * q | < sdiv */
|
||||
#if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM)
|
||||
BN_ULONG bn_div_3_words(BN_ULONG*,BN_ULONG,BN_ULONG);
|
||||
q=bn_div_3_words(wnump,d1,d0);
|
||||
#else
|
||||
BN_ULONG n0,n1,rem=0;
|
||||
|
||||
n0=wnump[0];
|
||||
n1=wnump[-1];
|
||||
if (n0 == d0)
|
||||
q=BN_MASK2;
|
||||
else /* n0 < d0 */
|
||||
{
|
||||
#ifdef BN_LLONG
|
||||
BN_ULLONG t2;
|
||||
|
||||
#if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
|
||||
q=(BN_ULONG)(((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0);
|
||||
#else
|
||||
q=bn_div_words(n0,n1,d0);
|
||||
#ifdef BN_DEBUG_LEVITTE
|
||||
fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
|
||||
X) -> 0x%08X\n",
|
||||
n0, n1, d0, q);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef REMAINDER_IS_ALREADY_CALCULATED
|
||||
/*
|
||||
* rem doesn't have to be BN_ULLONG. The least we
|
||||
* know it's less that d0, isn't it?
|
||||
*/
|
||||
rem=(n1-q*d0)&BN_MASK2;
|
||||
#endif
|
||||
t2=(BN_ULLONG)d1*q;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (t2 <= ((((BN_ULLONG)rem)<<BN_BITS2)|wnump[-2]))
|
||||
break;
|
||||
q--;
|
||||
rem += d0;
|
||||
if (rem < d0) break; /* don't let rem overflow */
|
||||
t2 -= d1;
|
||||
}
|
||||
#else /* !BN_LLONG */
|
||||
BN_ULONG t2l,t2h;
|
||||
|
||||
q=bn_div_words(n0,n1,d0);
|
||||
#ifdef BN_DEBUG_LEVITTE
|
||||
fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
|
||||
X) -> 0x%08X\n",
|
||||
n0, n1, d0, q);
|
||||
#endif
|
||||
#ifndef REMAINDER_IS_ALREADY_CALCULATED
|
||||
rem=(n1-q*d0)&BN_MASK2;
|
||||
#endif
|
||||
|
||||
#if defined(BN_UMULT_LOHI)
|
||||
BN_UMULT_LOHI(t2l,t2h,d1,q);
|
||||
#elif defined(BN_UMULT_HIGH)
|
||||
t2l = d1 * q;
|
||||
t2h = BN_UMULT_HIGH(d1,q);
|
||||
#else
|
||||
{
|
||||
BN_ULONG ql, qh;
|
||||
t2l=LBITS(d1); t2h=HBITS(d1);
|
||||
ql =LBITS(q); qh =HBITS(q);
|
||||
mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
|
||||
}
|
||||
#endif
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if ((t2h < rem) ||
|
||||
((t2h == rem) && (t2l <= wnump[-2])))
|
||||
break;
|
||||
q--;
|
||||
rem += d0;
|
||||
if (rem < d0) break; /* don't let rem overflow */
|
||||
if (t2l < d1) t2h--; t2l -= d1;
|
||||
}
|
||||
#endif /* !BN_LLONG */
|
||||
}
|
||||
#endif /* !BN_DIV3W */
|
||||
|
||||
l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
|
||||
tmp->d[div_n]=l0;
|
||||
wnum.d--;
|
||||
/* ingore top values of the bignums just sub the two
|
||||
* BN_ULONG arrays with bn_sub_words */
|
||||
if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n+1))
|
||||
{
|
||||
/* Note: As we have considered only the leading
|
||||
* two BN_ULONGs in the calculation of q, sdiv * q
|
||||
* might be greater than wnum (but then (q-1) * sdiv
|
||||
* is less or equal than wnum)
|
||||
*/
|
||||
q--;
|
||||
if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n))
|
||||
/* we can't have an overflow here (assuming
|
||||
* that q != 0, but if q == 0 then tmp is
|
||||
* zero anyway) */
|
||||
(*wnump)++;
|
||||
}
|
||||
/* store part of the result */
|
||||
*resp = q;
|
||||
}
|
||||
bn_correct_top(snum);
|
||||
if (rm != NULL)
|
||||
{
|
||||
/* Keep a copy of the neg flag in num because if rm==num
|
||||
* BN_rshift() will overwrite it.
|
||||
/* Since we don't know whether snum is larger than sdiv,
|
||||
* we pad snum with enough zeroes without changing its
|
||||
* value.
|
||||
*/
|
||||
int neg = num->neg;
|
||||
BN_rshift(rm,snum,norm_shift);
|
||||
if (!BN_is_zero(rm))
|
||||
rm->neg = neg;
|
||||
bn_check_top(rm);
|
||||
}
|
||||
BN_CTX_end(ctx);
|
||||
return(1);
|
||||
err:
|
||||
bn_check_top(rm);
|
||||
BN_CTX_end(ctx);
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
||||
/* BN_div_no_branch is a special version of BN_div. It does not contain
|
||||
* branches that may leak sensitive information.
|
||||
*/
|
||||
static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
|
||||
const BIGNUM *divisor, BN_CTX *ctx)
|
||||
{
|
||||
int norm_shift,i,loop;
|
||||
BIGNUM *tmp,wnum,*snum,*sdiv,*res;
|
||||
BN_ULONG *resp,*wnump;
|
||||
BN_ULONG d0,d1;
|
||||
int num_n,div_n;
|
||||
|
||||
bn_check_top(dv);
|
||||
bn_check_top(rm);
|
||||
/* bn_check_top(num); */ /* 'num' has been checked in BN_div() */
|
||||
bn_check_top(divisor);
|
||||
|
||||
if (BN_is_zero(divisor))
|
||||
{
|
||||
BNerr(BN_F_BN_DIV_NO_BRANCH,BN_R_DIV_BY_ZERO);
|
||||
return(0);
|
||||
}
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
tmp=BN_CTX_get(ctx);
|
||||
snum=BN_CTX_get(ctx);
|
||||
sdiv=BN_CTX_get(ctx);
|
||||
if (dv == NULL)
|
||||
res=BN_CTX_get(ctx);
|
||||
else res=dv;
|
||||
if (sdiv == NULL || res == NULL) goto err;
|
||||
|
||||
/* First we normalise the numbers */
|
||||
norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
|
||||
if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err;
|
||||
sdiv->neg=0;
|
||||
norm_shift+=BN_BITS2;
|
||||
if (!(BN_lshift(snum,num,norm_shift))) goto err;
|
||||
snum->neg=0;
|
||||
|
||||
/* Since we don't know whether snum is larger than sdiv,
|
||||
* we pad snum with enough zeroes without changing its
|
||||
* value.
|
||||
*/
|
||||
if (snum->top <= sdiv->top+1)
|
||||
{
|
||||
if (bn_wexpand(snum, sdiv->top + 2) == NULL) goto err;
|
||||
for (i = snum->top; i < sdiv->top + 2; i++) snum->d[i] = 0;
|
||||
snum->top = sdiv->top + 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (bn_wexpand(snum, snum->top + 1) == NULL) goto err;
|
||||
snum->d[snum->top] = 0;
|
||||
snum->top ++;
|
||||
if (snum->top <= sdiv->top+1)
|
||||
{
|
||||
if (bn_wexpand(snum, sdiv->top + 2) == NULL) goto err;
|
||||
for (i = snum->top; i < sdiv->top + 2; i++) snum->d[i] = 0;
|
||||
snum->top = sdiv->top + 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (bn_wexpand(snum, snum->top + 1) == NULL) goto err;
|
||||
snum->d[snum->top] = 0;
|
||||
snum->top ++;
|
||||
}
|
||||
}
|
||||
|
||||
div_n=sdiv->top;
|
||||
|
@ -500,12 +282,27 @@ static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
|
|||
/* Setup to 'res' */
|
||||
res->neg= (num->neg^divisor->neg);
|
||||
if (!bn_wexpand(res,(loop+1))) goto err;
|
||||
res->top=loop-1;
|
||||
res->top=loop-no_branch;
|
||||
resp= &(res->d[loop-1]);
|
||||
|
||||
/* space for temp */
|
||||
if (!bn_wexpand(tmp,(div_n+1))) goto err;
|
||||
|
||||
if (!no_branch)
|
||||
{
|
||||
if (BN_ucmp(&wnum,sdiv) >= 0)
|
||||
{
|
||||
/* If BN_DEBUG_RAND is defined BN_ucmp changes (via
|
||||
* bn_pollute) the const bignum arguments =>
|
||||
* clean the values between top and max again */
|
||||
bn_clear_top2max(&wnum);
|
||||
bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n);
|
||||
*resp=1;
|
||||
}
|
||||
else
|
||||
res->top--;
|
||||
}
|
||||
|
||||
/* if res->top == 0 then clear the neg value otherwise decrease
|
||||
* the resp pointer */
|
||||
if (res->top == 0)
|
||||
|
@ -638,7 +435,7 @@ X) -> 0x%08X\n",
|
|||
rm->neg = neg;
|
||||
bn_check_top(rm);
|
||||
}
|
||||
bn_correct_top(res);
|
||||
if (no_branch) bn_correct_top(res);
|
||||
BN_CTX_end(ctx);
|
||||
return(1);
|
||||
err:
|
||||
|
@ -646,5 +443,4 @@ err:
|
|||
BN_CTX_end(ctx);
|
||||
return(0);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -113,6 +113,18 @@
|
|||
#include "cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#ifdef _WIN32
|
||||
# include <malloc.h>
|
||||
# ifndef alloca
|
||||
# define alloca _alloca
|
||||
# endif
|
||||
#elif defined(__GNUC__)
|
||||
# ifndef alloca
|
||||
# define alloca(s) __builtin_alloca((s))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* maximum precomputation table size for *variable* sliding windows */
|
||||
#define TABLE_SIZE 32
|
||||
|
||||
|
@ -522,23 +534,17 @@ err:
|
|||
* as cache lines are concerned. The following functions are used to transfer a BIGNUM
|
||||
* from/to that table. */
|
||||
|
||||
static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
|
||||
static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, int idx, int width)
|
||||
{
|
||||
size_t i, j;
|
||||
|
||||
if (bn_wexpand(b, top) == NULL)
|
||||
return 0;
|
||||
while (b->top < top)
|
||||
{
|
||||
b->d[b->top++] = 0;
|
||||
}
|
||||
|
||||
if (top > b->top)
|
||||
top = b->top; /* this works because 'buf' is explicitly zeroed */
|
||||
for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
|
||||
{
|
||||
buf[j] = ((unsigned char*)b->d)[i];
|
||||
}
|
||||
|
||||
bn_correct_top(b);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -561,7 +567,7 @@ static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf
|
|||
|
||||
/* Given a pointer value, compute the next address that is a cache line multiple. */
|
||||
#define MOD_EXP_CTIME_ALIGN(x_) \
|
||||
((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
|
||||
((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
|
||||
|
||||
/* This variant of BN_mod_exp_mont() uses fixed windows and the special
|
||||
* precomputation memory layout to limit data-dependency to a minimum
|
||||
|
@ -572,17 +578,15 @@ static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf
|
|||
int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
|
||||
const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
|
||||
{
|
||||
int i,bits,ret=0,idx,window,wvalue;
|
||||
int i,bits,ret=0,window,wvalue;
|
||||
int top;
|
||||
BIGNUM *r;
|
||||
const BIGNUM *aa;
|
||||
BN_MONT_CTX *mont=NULL;
|
||||
|
||||
int numPowers;
|
||||
unsigned char *powerbufFree=NULL;
|
||||
int powerbufLen = 0;
|
||||
unsigned char *powerbuf=NULL;
|
||||
BIGNUM *computeTemp=NULL, *am=NULL;
|
||||
BIGNUM tmp, am;
|
||||
|
||||
bn_check_top(a);
|
||||
bn_check_top(p);
|
||||
|
@ -602,10 +606,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/* Initialize BIGNUM context and allocate intermediate result */
|
||||
BN_CTX_start(ctx);
|
||||
r = BN_CTX_get(ctx);
|
||||
if (r == NULL) goto err;
|
||||
|
||||
/* Allocate a montgomery context if it was not supplied by the caller.
|
||||
* If this is not done, things will break in the montgomery part.
|
||||
|
@ -620,40 +621,154 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
|
|||
|
||||
/* Get the window size to use with size of p. */
|
||||
window = BN_window_bits_for_ctime_exponent_size(bits);
|
||||
#if defined(OPENSSL_BN_ASM_MONT5)
|
||||
if (window==6 && bits<=1024) window=5; /* ~5% improvement of 2048-bit RSA sign */
|
||||
#endif
|
||||
|
||||
/* Allocate a buffer large enough to hold all of the pre-computed
|
||||
* powers of a.
|
||||
* powers of am, am itself and tmp.
|
||||
*/
|
||||
numPowers = 1 << window;
|
||||
powerbufLen = sizeof(m->d[0])*top*numPowers;
|
||||
powerbufLen = sizeof(m->d[0])*(top*numPowers +
|
||||
((2*top)>numPowers?(2*top):numPowers));
|
||||
#ifdef alloca
|
||||
if (powerbufLen < 3072)
|
||||
powerbufFree = alloca(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
|
||||
else
|
||||
#endif
|
||||
if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL)
|
||||
goto err;
|
||||
|
||||
powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
|
||||
memset(powerbuf, 0, powerbufLen);
|
||||
|
||||
/* Initialize the intermediate result. Do this early to save double conversion,
|
||||
* once each for a^0 and intermediate result.
|
||||
*/
|
||||
if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
|
||||
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, numPowers)) goto err;
|
||||
#ifdef alloca
|
||||
if (powerbufLen < 3072)
|
||||
powerbufFree = NULL;
|
||||
#endif
|
||||
|
||||
/* Initialize computeTemp as a^1 with montgomery precalcs */
|
||||
computeTemp = BN_CTX_get(ctx);
|
||||
am = BN_CTX_get(ctx);
|
||||
if (computeTemp==NULL || am==NULL) goto err;
|
||||
/* lay down tmp and am right after powers table */
|
||||
tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0])*top*numPowers);
|
||||
am.d = tmp.d + top;
|
||||
tmp.top = am.top = 0;
|
||||
tmp.dmax = am.dmax = top;
|
||||
tmp.neg = am.neg = 0;
|
||||
tmp.flags = am.flags = BN_FLG_STATIC_DATA;
|
||||
|
||||
/* prepare a^0 in Montgomery domain */
|
||||
#if 1
|
||||
if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx)) goto err;
|
||||
#else
|
||||
tmp.d[0] = (0-m->d[0])&BN_MASK2; /* 2^(top*BN_BITS2) - m */
|
||||
for (i=1;i<top;i++)
|
||||
tmp.d[i] = (~m->d[i])&BN_MASK2;
|
||||
tmp.top = top;
|
||||
#endif
|
||||
|
||||
/* prepare a^1 in Montgomery domain */
|
||||
if (a->neg || BN_ucmp(a,m) >= 0)
|
||||
{
|
||||
if (!BN_mod(am,a,m,ctx))
|
||||
goto err;
|
||||
aa= am;
|
||||
if (!BN_mod(&am,a,m,ctx)) goto err;
|
||||
if (!BN_to_montgomery(&am,&am,mont,ctx)) goto err;
|
||||
}
|
||||
else
|
||||
aa=a;
|
||||
if (!BN_to_montgomery(am,aa,mont,ctx)) goto err;
|
||||
if (!BN_copy(computeTemp, am)) goto err;
|
||||
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, numPowers)) goto err;
|
||||
else if (!BN_to_montgomery(&am,a,mont,ctx)) goto err;
|
||||
|
||||
#if defined(OPENSSL_BN_ASM_MONT5)
|
||||
/* This optimization uses ideas from http://eprint.iacr.org/2011/239,
|
||||
* specifically optimization of cache-timing attack countermeasures
|
||||
* and pre-computation optimization. */
|
||||
|
||||
/* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
|
||||
* 512-bit RSA is hardly relevant, we omit it to spare size... */
|
||||
if (window==5)
|
||||
{
|
||||
void bn_mul_mont_gather5(BN_ULONG *rp,const BN_ULONG *ap,
|
||||
const void *table,const BN_ULONG *np,
|
||||
const BN_ULONG *n0,int num,int power);
|
||||
void bn_scatter5(const BN_ULONG *inp,size_t num,
|
||||
void *table,size_t power);
|
||||
void bn_gather5(BN_ULONG *out,size_t num,
|
||||
void *table,size_t power);
|
||||
|
||||
BN_ULONG *np=mont->N.d, *n0=mont->n0;
|
||||
|
||||
/* BN_to_montgomery can contaminate words above .top
|
||||
* [in BN_DEBUG[_DEBUG] build]... */
|
||||
for (i=am.top; i<top; i++) am.d[i]=0;
|
||||
for (i=tmp.top; i<top; i++) tmp.d[i]=0;
|
||||
|
||||
bn_scatter5(tmp.d,top,powerbuf,0);
|
||||
bn_scatter5(am.d,am.top,powerbuf,1);
|
||||
bn_mul_mont(tmp.d,am.d,am.d,np,n0,top);
|
||||
bn_scatter5(tmp.d,top,powerbuf,2);
|
||||
|
||||
#if 0
|
||||
for (i=3; i<32; i++)
|
||||
{
|
||||
/* Calculate a^i = a^(i-1) * a */
|
||||
bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
|
||||
bn_scatter5(tmp.d,top,powerbuf,i);
|
||||
}
|
||||
#else
|
||||
/* same as above, but uses squaring for 1/2 of operations */
|
||||
for (i=4; i<32; i*=2)
|
||||
{
|
||||
bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
|
||||
bn_scatter5(tmp.d,top,powerbuf,i);
|
||||
}
|
||||
for (i=3; i<8; i+=2)
|
||||
{
|
||||
int j;
|
||||
bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
|
||||
bn_scatter5(tmp.d,top,powerbuf,i);
|
||||
for (j=2*i; j<32; j*=2)
|
||||
{
|
||||
bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
|
||||
bn_scatter5(tmp.d,top,powerbuf,j);
|
||||
}
|
||||
}
|
||||
for (; i<16; i+=2)
|
||||
{
|
||||
bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
|
||||
bn_scatter5(tmp.d,top,powerbuf,i);
|
||||
bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
|
||||
bn_scatter5(tmp.d,top,powerbuf,2*i);
|
||||
}
|
||||
for (; i<32; i+=2)
|
||||
{
|
||||
bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
|
||||
bn_scatter5(tmp.d,top,powerbuf,i);
|
||||
}
|
||||
#endif
|
||||
bits--;
|
||||
for (wvalue=0, i=bits%5; i>=0; i--,bits--)
|
||||
wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
|
||||
bn_gather5(tmp.d,top,powerbuf,wvalue);
|
||||
|
||||
/* Scan the exponent one window at a time starting from the most
|
||||
* significant bits.
|
||||
*/
|
||||
while (bits >= 0)
|
||||
{
|
||||
for (wvalue=0, i=0; i<5; i++,bits--)
|
||||
wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
|
||||
|
||||
bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
|
||||
bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
|
||||
bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
|
||||
bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
|
||||
bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
|
||||
bn_mul_mont_gather5(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue);
|
||||
}
|
||||
|
||||
tmp.top=top;
|
||||
bn_correct_top(&tmp);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers)) goto err;
|
||||
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers)) goto err;
|
||||
|
||||
/* If the window size is greater than 1, then calculate
|
||||
* val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
|
||||
|
@ -662,62 +777,54 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
|
|||
*/
|
||||
if (window > 1)
|
||||
{
|
||||
for (i=2; i<numPowers; i++)
|
||||
if (!BN_mod_mul_montgomery(&tmp,&am,&am,mont,ctx)) goto err;
|
||||
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, numPowers)) goto err;
|
||||
for (i=3; i<numPowers; i++)
|
||||
{
|
||||
/* Calculate a^i = a^(i-1) * a */
|
||||
if (!BN_mod_mul_montgomery(computeTemp,am,computeTemp,mont,ctx))
|
||||
if (!BN_mod_mul_montgomery(&tmp,&am,&tmp,mont,ctx))
|
||||
goto err;
|
||||
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(computeTemp, top, powerbuf, i, numPowers)) goto err;
|
||||
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i, numPowers)) goto err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Adjust the number of bits up to a multiple of the window size.
|
||||
* If the exponent length is not a multiple of the window size, then
|
||||
* this pads the most significant bits with zeros to normalize the
|
||||
* scanning loop to there's no special cases.
|
||||
*
|
||||
* * NOTE: Making the window size a power of two less than the native
|
||||
* * word size ensures that the padded bits won't go past the last
|
||||
* * word in the internal BIGNUM structure. Going past the end will
|
||||
* * still produce the correct result, but causes a different branch
|
||||
* * to be taken in the BN_is_bit_set function.
|
||||
*/
|
||||
bits = ((bits+window-1)/window)*window;
|
||||
idx=bits-1; /* The top bit of the window */
|
||||
|
||||
/* Scan the exponent one window at a time starting from the most
|
||||
* significant bits.
|
||||
*/
|
||||
while (idx >= 0)
|
||||
bits--;
|
||||
for (wvalue=0, i=bits%window; i>=0; i--,bits--)
|
||||
wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
|
||||
if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp,top,powerbuf,wvalue,numPowers)) goto err;
|
||||
|
||||
/* Scan the exponent one window at a time starting from the most
|
||||
* significant bits.
|
||||
*/
|
||||
while (bits >= 0)
|
||||
{
|
||||
wvalue=0; /* The 'value' of the window */
|
||||
|
||||
/* Scan the window, squaring the result as we go */
|
||||
for (i=0; i<window; i++,idx--)
|
||||
for (i=0; i<window; i++,bits--)
|
||||
{
|
||||
if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) goto err;
|
||||
wvalue = (wvalue<<1)+BN_is_bit_set(p,idx);
|
||||
if (!BN_mod_mul_montgomery(&tmp,&tmp,&tmp,mont,ctx)) goto err;
|
||||
wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
|
||||
}
|
||||
|
||||
/* Fetch the appropriate pre-computed value from the pre-buf */
|
||||
if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(computeTemp, top, powerbuf, wvalue, numPowers)) goto err;
|
||||
if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, numPowers)) goto err;
|
||||
|
||||
/* Multiply the result into the intermediate result */
|
||||
if (!BN_mod_mul_montgomery(r,r,computeTemp,mont,ctx)) goto err;
|
||||
if (!BN_mod_mul_montgomery(&tmp,&tmp,&am,mont,ctx)) goto err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Convert the final result from montgomery to standard format */
|
||||
if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
|
||||
if (!BN_from_montgomery(rr,&tmp,mont,ctx)) goto err;
|
||||
ret=1;
|
||||
err:
|
||||
if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
|
||||
if (powerbuf!=NULL)
|
||||
{
|
||||
OPENSSL_cleanse(powerbuf,powerbufLen);
|
||||
OPENSSL_free(powerbufFree);
|
||||
if (powerbufFree) OPENSSL_free(powerbufFree);
|
||||
}
|
||||
if (am!=NULL) BN_clear(am);
|
||||
if (computeTemp!=NULL) BN_clear(computeTemp);
|
||||
BN_CTX_end(ctx);
|
||||
return(ret);
|
||||
}
|
||||
|
@ -988,4 +1095,3 @@ err:
|
|||
bn_check_top(r);
|
||||
return(ret);
|
||||
}
|
||||
|
||||
|
|
|
@ -124,6 +124,7 @@ static const BN_ULONG SQR_tb[16] =
|
|||
SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF]
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_BN_ASM_GF2m)
|
||||
/* Product of two polynomials a, b each with degree < BN_BITS2 - 1,
|
||||
* result is a polynomial r with degree < 2 * BN_BITS - 1
|
||||
* The caller MUST ensure that the variables have the right amount
|
||||
|
@ -218,7 +219,9 @@ static void bn_GF2m_mul_2x2(BN_ULONG *r, const BN_ULONG a1, const BN_ULONG a0, c
|
|||
r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */
|
||||
r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */
|
||||
}
|
||||
|
||||
#else
|
||||
void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0);
|
||||
#endif
|
||||
|
||||
/* Add polynomials a and b and store result in r; r could be a or b, a and b
|
||||
* could be equal; r is the bitwise XOR of a and b.
|
||||
|
@ -362,21 +365,17 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[])
|
|||
int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p)
|
||||
{
|
||||
int ret = 0;
|
||||
const int max = BN_num_bits(p) + 1;
|
||||
int *arr=NULL;
|
||||
int arr[6];
|
||||
bn_check_top(a);
|
||||
bn_check_top(p);
|
||||
if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
|
||||
ret = BN_GF2m_poly2arr(p, arr, max);
|
||||
if (!ret || ret > max)
|
||||
ret = BN_GF2m_poly2arr(p, arr, sizeof(arr)/sizeof(arr[0]));
|
||||
if (!ret || ret > (int)(sizeof(arr)/sizeof(arr[0])))
|
||||
{
|
||||
BNerr(BN_F_BN_GF2M_MOD,BN_R_INVALID_LENGTH);
|
||||
goto err;
|
||||
return 0;
|
||||
}
|
||||
ret = BN_GF2m_mod_arr(r, a, arr);
|
||||
bn_check_top(r);
|
||||
err:
|
||||
if (arr) OPENSSL_free(arr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -531,18 +530,18 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
|
|||
|
||||
BN_CTX_start(ctx);
|
||||
|
||||
b = BN_CTX_get(ctx);
|
||||
c = BN_CTX_get(ctx);
|
||||
u = BN_CTX_get(ctx);
|
||||
v = BN_CTX_get(ctx);
|
||||
if (v == NULL) goto err;
|
||||
if ((b = BN_CTX_get(ctx))==NULL) goto err;
|
||||
if ((c = BN_CTX_get(ctx))==NULL) goto err;
|
||||
if ((u = BN_CTX_get(ctx))==NULL) goto err;
|
||||
if ((v = BN_CTX_get(ctx))==NULL) goto err;
|
||||
|
||||
if (!BN_one(b)) goto err;
|
||||
if (!BN_GF2m_mod(u, a, p)) goto err;
|
||||
if (!BN_copy(v, p)) goto err;
|
||||
|
||||
if (BN_is_zero(u)) goto err;
|
||||
|
||||
if (!BN_copy(v, p)) goto err;
|
||||
#if 0
|
||||
if (!BN_one(b)) goto err;
|
||||
|
||||
while (1)
|
||||
{
|
||||
while (!BN_is_odd(u))
|
||||
|
@ -567,13 +566,86 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
|
|||
if (!BN_GF2m_add(u, u, v)) goto err;
|
||||
if (!BN_GF2m_add(b, b, c)) goto err;
|
||||
}
|
||||
#else
|
||||
{
|
||||
int i, ubits = BN_num_bits(u),
|
||||
vbits = BN_num_bits(v), /* v is copy of p */
|
||||
top = p->top;
|
||||
BN_ULONG *udp,*bdp,*vdp,*cdp;
|
||||
|
||||
bn_wexpand(u,top); udp = u->d;
|
||||
for (i=u->top;i<top;i++) udp[i] = 0;
|
||||
u->top = top;
|
||||
bn_wexpand(b,top); bdp = b->d;
|
||||
bdp[0] = 1;
|
||||
for (i=1;i<top;i++) bdp[i] = 0;
|
||||
b->top = top;
|
||||
bn_wexpand(c,top); cdp = c->d;
|
||||
for (i=0;i<top;i++) cdp[i] = 0;
|
||||
c->top = top;
|
||||
vdp = v->d; /* It pays off to "cache" *->d pointers, because
|
||||
* it allows optimizer to be more aggressive.
|
||||
* But we don't have to "cache" p->d, because *p
|
||||
* is declared 'const'... */
|
||||
while (1)
|
||||
{
|
||||
while (ubits && !(udp[0]&1))
|
||||
{
|
||||
BN_ULONG u0,u1,b0,b1,mask;
|
||||
|
||||
u0 = udp[0];
|
||||
b0 = bdp[0];
|
||||
mask = (BN_ULONG)0-(b0&1);
|
||||
b0 ^= p->d[0]&mask;
|
||||
for (i=0;i<top-1;i++)
|
||||
{
|
||||
u1 = udp[i+1];
|
||||
udp[i] = ((u0>>1)|(u1<<(BN_BITS2-1)))&BN_MASK2;
|
||||
u0 = u1;
|
||||
b1 = bdp[i+1]^(p->d[i+1]&mask);
|
||||
bdp[i] = ((b0>>1)|(b1<<(BN_BITS2-1)))&BN_MASK2;
|
||||
b0 = b1;
|
||||
}
|
||||
udp[i] = u0>>1;
|
||||
bdp[i] = b0>>1;
|
||||
ubits--;
|
||||
}
|
||||
|
||||
if (ubits<=BN_BITS2 && udp[0]==1) break;
|
||||
|
||||
if (ubits<vbits)
|
||||
{
|
||||
i = ubits; ubits = vbits; vbits = i;
|
||||
tmp = u; u = v; v = tmp;
|
||||
tmp = b; b = c; c = tmp;
|
||||
udp = vdp; vdp = v->d;
|
||||
bdp = cdp; cdp = c->d;
|
||||
}
|
||||
for(i=0;i<top;i++)
|
||||
{
|
||||
udp[i] ^= vdp[i];
|
||||
bdp[i] ^= cdp[i];
|
||||
}
|
||||
if (ubits==vbits)
|
||||
{
|
||||
bn_correct_top(u);
|
||||
ubits = BN_num_bits(u);
|
||||
}
|
||||
}
|
||||
bn_correct_top(b);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!BN_copy(r, b)) goto err;
|
||||
bn_check_top(r);
|
||||
ret = 1;
|
||||
|
||||
err:
|
||||
#ifdef BN_DEBUG /* BN_CTX_end would complain about the expanded form */
|
||||
bn_correct_top(c);
|
||||
bn_correct_top(u);
|
||||
bn_correct_top(v);
|
||||
#endif
|
||||
BN_CTX_end(ctx);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -238,7 +238,7 @@ extern "C" {
|
|||
# if defined(__DECC)
|
||||
# include <c_asm.h>
|
||||
# define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b))
|
||||
# elif defined(__GNUC__)
|
||||
# elif defined(__GNUC__) && __GNUC__>=2
|
||||
# define BN_UMULT_HIGH(a,b) ({ \
|
||||
register BN_ULONG ret; \
|
||||
asm ("umulh %1,%2,%0" \
|
||||
|
@ -247,7 +247,7 @@ extern "C" {
|
|||
ret; })
|
||||
# endif /* compiler */
|
||||
# elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG)
|
||||
# if defined(__GNUC__)
|
||||
# if defined(__GNUC__) && __GNUC__>=2
|
||||
# define BN_UMULT_HIGH(a,b) ({ \
|
||||
register BN_ULONG ret; \
|
||||
asm ("mulhdu %0,%1,%2" \
|
||||
|
@ -257,7 +257,7 @@ extern "C" {
|
|||
# endif /* compiler */
|
||||
# elif (defined(__x86_64) || defined(__x86_64__)) && \
|
||||
(defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
|
||||
# if defined(__GNUC__)
|
||||
# if defined(__GNUC__) && __GNUC__>=2
|
||||
# define BN_UMULT_HIGH(a,b) ({ \
|
||||
register BN_ULONG ret,discard; \
|
||||
asm ("mulq %3" \
|
||||
|
@ -280,6 +280,19 @@ extern "C" {
|
|||
# define BN_UMULT_HIGH(a,b) __umulh((a),(b))
|
||||
# define BN_UMULT_LOHI(low,high,a,b) ((low)=_umul128((a),(b),&(high)))
|
||||
# endif
|
||||
# elif defined(__mips) && (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG))
|
||||
# if defined(__GNUC__) && __GNUC__>=2
|
||||
# define BN_UMULT_HIGH(a,b) ({ \
|
||||
register BN_ULONG ret; \
|
||||
asm ("dmultu %1,%2" \
|
||||
: "=h"(ret) \
|
||||
: "r"(a), "r"(b) : "l"); \
|
||||
ret; })
|
||||
# define BN_UMULT_LOHI(low,high,a,b) \
|
||||
asm ("dmultu %2,%3" \
|
||||
: "=l"(low),"=h"(high) \
|
||||
: "r"(a), "r"(b));
|
||||
# endif
|
||||
# endif /* cpu */
|
||||
#endif /* OPENSSL_NO_ASM */
|
||||
|
||||
|
|
|
@ -177,31 +177,26 @@ err:
|
|||
static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
|
||||
{
|
||||
BIGNUM *n;
|
||||
BN_ULONG *ap,*np,*rp,n0,v,*nrp;
|
||||
int al,nl,max,i,x,ri;
|
||||
BN_ULONG *ap,*np,*rp,n0,v,carry;
|
||||
int nl,max,i;
|
||||
|
||||
n= &(mont->N);
|
||||
/* mont->ri is the size of mont->N in bits (rounded up
|
||||
to the word size) */
|
||||
al=ri=mont->ri/BN_BITS2;
|
||||
|
||||
nl=n->top;
|
||||
if ((al == 0) || (nl == 0)) { ret->top=0; return(1); }
|
||||
if (nl == 0) { ret->top=0; return(1); }
|
||||
|
||||
max=(nl+al+1); /* allow for overflow (no?) XXX */
|
||||
max=(2*nl); /* carry is stored separately */
|
||||
if (bn_wexpand(r,max) == NULL) return(0);
|
||||
|
||||
r->neg^=n->neg;
|
||||
np=n->d;
|
||||
rp=r->d;
|
||||
nrp= &(r->d[nl]);
|
||||
|
||||
/* clear the top words of T */
|
||||
#if 1
|
||||
for (i=r->top; i<max; i++) /* memset? XXX */
|
||||
r->d[i]=0;
|
||||
rp[i]=0;
|
||||
#else
|
||||
memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG));
|
||||
memset(&(rp[r->top]),0,(max-r->top)*sizeof(BN_ULONG));
|
||||
#endif
|
||||
|
||||
r->top=max;
|
||||
|
@ -210,7 +205,7 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
|
|||
#ifdef BN_COUNT
|
||||
fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl);
|
||||
#endif
|
||||
for (i=0; i<nl; i++)
|
||||
for (carry=0, i=0; i<nl; i++, rp++)
|
||||
{
|
||||
#ifdef __TANDEM
|
||||
{
|
||||
|
@ -228,61 +223,33 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
|
|||
#else
|
||||
v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
|
||||
#endif
|
||||
nrp++;
|
||||
rp++;
|
||||
if (((nrp[-1]+=v)&BN_MASK2) >= v)
|
||||
continue;
|
||||
else
|
||||
{
|
||||
if (((++nrp[0])&BN_MASK2) != 0) continue;
|
||||
if (((++nrp[1])&BN_MASK2) != 0) continue;
|
||||
for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ;
|
||||
}
|
||||
v = (v+carry+rp[nl])&BN_MASK2;
|
||||
carry |= (v != rp[nl]);
|
||||
carry &= (v <= rp[nl]);
|
||||
rp[nl]=v;
|
||||
}
|
||||
bn_correct_top(r);
|
||||
|
||||
/* mont->ri will be a multiple of the word size and below code
|
||||
* is kind of BN_rshift(ret,r,mont->ri) equivalent */
|
||||
if (r->top <= ri)
|
||||
{
|
||||
ret->top=0;
|
||||
return(1);
|
||||
}
|
||||
al=r->top-ri;
|
||||
|
||||
#define BRANCH_FREE 1
|
||||
#if BRANCH_FREE
|
||||
if (bn_wexpand(ret,ri) == NULL) return(0);
|
||||
x=0-(((al-ri)>>(sizeof(al)*8-1))&1);
|
||||
ret->top=x=(ri&~x)|(al&x); /* min(ri,al) */
|
||||
if (bn_wexpand(ret,nl) == NULL) return(0);
|
||||
ret->top=nl;
|
||||
ret->neg=r->neg;
|
||||
|
||||
rp=ret->d;
|
||||
ap=&(r->d[ri]);
|
||||
ap=&(r->d[nl]);
|
||||
|
||||
#define BRANCH_FREE 1
|
||||
#if BRANCH_FREE
|
||||
{
|
||||
size_t m1,m2;
|
||||
BN_ULONG *nrp;
|
||||
size_t m;
|
||||
|
||||
v=bn_sub_words(rp,ap,np,ri);
|
||||
/* this ----------------^^ works even in al<ri case
|
||||
* thanks to zealous zeroing of top of the vector in the
|
||||
* beginning. */
|
||||
|
||||
/* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */
|
||||
/* in other words if subtraction result is real, then
|
||||
v=bn_sub_words(rp,ap,np,nl)-carry;
|
||||
/* if subtraction result is real, then
|
||||
* trick unconditional memcpy below to perform in-place
|
||||
* "refresh" instead of actual copy. */
|
||||
m1=0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1); /* al<ri */
|
||||
m2=0-(size_t)(((ri-al)>>(sizeof(al)*8-1))&1); /* al>ri */
|
||||
m1|=m2; /* (al!=ri) */
|
||||
m1|=(0-(size_t)v); /* (al!=ri || v) */
|
||||
m1&=~m2; /* (al!=ri || v) && !al>ri */
|
||||
nrp=(BN_ULONG *)(((PTR_SIZE_INT)rp&~m1)|((PTR_SIZE_INT)ap&m1));
|
||||
}
|
||||
m=(0-(size_t)v);
|
||||
nrp=(BN_ULONG *)(((PTR_SIZE_INT)rp&~m)|((PTR_SIZE_INT)ap&m));
|
||||
|
||||
/* 'i<ri' is chosen to eliminate dependency on input data, even
|
||||
* though it results in redundant copy in al<ri case. */
|
||||
for (i=0,ri-=4; i<ri; i+=4)
|
||||
for (i=0,nl-=4; i<nl; i+=4)
|
||||
{
|
||||
BN_ULONG t1,t2,t3,t4;
|
||||
|
||||
|
@ -295,40 +262,15 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
|
|||
rp[i+2]=t3;
|
||||
rp[i+3]=t4;
|
||||
}
|
||||
for (ri+=4; i<ri; i++)
|
||||
for (nl+=4; i<nl; i++)
|
||||
rp[i]=nrp[i], ap[i]=0;
|
||||
}
|
||||
#else
|
||||
if (bn_sub_words (rp,ap,np,nl)-carry)
|
||||
memcpy(rp,ap,nl*sizeof(BN_ULONG));
|
||||
#endif
|
||||
bn_correct_top(r);
|
||||
bn_correct_top(ret);
|
||||
#else
|
||||
if (bn_wexpand(ret,al) == NULL) return(0);
|
||||
ret->top=al;
|
||||
ret->neg=r->neg;
|
||||
|
||||
rp=ret->d;
|
||||
ap=&(r->d[ri]);
|
||||
al-=4;
|
||||
for (i=0; i<al; i+=4)
|
||||
{
|
||||
BN_ULONG t1,t2,t3,t4;
|
||||
|
||||
t1=ap[i+0];
|
||||
t2=ap[i+1];
|
||||
t3=ap[i+2];
|
||||
t4=ap[i+3];
|
||||
rp[i+0]=t1;
|
||||
rp[i+1]=t2;
|
||||
rp[i+2]=t3;
|
||||
rp[i+3]=t4;
|
||||
}
|
||||
al+=4;
|
||||
for (; i<al; i++)
|
||||
rp[i]=ap[i];
|
||||
|
||||
if (BN_ucmp(ret, &(mont->N)) >= 0)
|
||||
{
|
||||
if (!BN_usub(ret,ret,&(mont->N))) return(0);
|
||||
}
|
||||
#endif
|
||||
bn_check_top(ret);
|
||||
|
||||
return(1);
|
||||
|
|
|
@ -319,6 +319,13 @@ static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top)
|
|||
:(to[(n)/2] =((m)&1)?(from[(m)/2]>>32):(from[(m)/2]&BN_MASK2l)))
|
||||
#define bn_32_set_0(to, n) (((n)&1)?(to[(n)/2]&=BN_MASK2l):(to[(n)/2]=0));
|
||||
#define bn_cp_32(to,n,from,m) ((m)>=0)?bn_cp_32_naked(to,n,from,m):bn_32_set_0(to,n)
|
||||
# if defined(L_ENDIAN)
|
||||
# if defined(__arch64__)
|
||||
# define NIST_INT64 long
|
||||
# else
|
||||
# define NIST_INT64 long long
|
||||
# endif
|
||||
# endif
|
||||
#else
|
||||
#define bn_cp_64(to, n, from, m) \
|
||||
{ \
|
||||
|
@ -330,13 +337,15 @@ static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top)
|
|||
bn_32_set_0(to, (n)*2); \
|
||||
bn_32_set_0(to, (n)*2+1); \
|
||||
}
|
||||
#if BN_BITS2 == 32
|
||||
#define bn_cp_32(to, n, from, m) (to)[n] = (m>=0)?((from)[m]):0;
|
||||
#define bn_32_set_0(to, n) (to)[n] = (BN_ULONG)0;
|
||||
#endif
|
||||
# if defined(_WIN32) && !defined(__GNUC__)
|
||||
# define NIST_INT64 __int64
|
||||
# else
|
||||
# define NIST_INT64 long long
|
||||
# endif
|
||||
#endif /* BN_BITS2 != 64 */
|
||||
|
||||
|
||||
#define nist_set_192(to, from, a1, a2, a3) \
|
||||
{ \
|
||||
bn_cp_64(to, 0, from, (a3) - 3) \
|
||||
|
@ -350,9 +359,11 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
|
|||
int top = a->top, i;
|
||||
int carry;
|
||||
register BN_ULONG *r_d, *a_d = a->d;
|
||||
BN_ULONG t_d[BN_NIST_192_TOP],
|
||||
buf[BN_NIST_192_TOP],
|
||||
c_d[BN_NIST_192_TOP],
|
||||
union {
|
||||
BN_ULONG bn[BN_NIST_192_TOP];
|
||||
unsigned int ui[BN_NIST_192_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)];
|
||||
} buf;
|
||||
BN_ULONG c_d[BN_NIST_192_TOP],
|
||||
*res;
|
||||
PTR_SIZE_INT mask;
|
||||
static const BIGNUM _bignum_nist_p_192_sqr = {
|
||||
|
@ -385,15 +396,48 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
|
|||
else
|
||||
r_d = a_d;
|
||||
|
||||
nist_cp_bn_0(buf, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP, BN_NIST_192_TOP);
|
||||
nist_cp_bn_0(buf.bn, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP, BN_NIST_192_TOP);
|
||||
|
||||
nist_set_192(t_d, buf, 0, 3, 3);
|
||||
#if defined(NIST_INT64)
|
||||
{
|
||||
NIST_INT64 acc; /* accumulator */
|
||||
unsigned int *rp=(unsigned int *)r_d;
|
||||
const unsigned int *bp=(const unsigned int *)buf.ui;
|
||||
|
||||
acc = rp[0]; acc += bp[3*2-6];
|
||||
acc += bp[5*2-6]; rp[0] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[1]; acc += bp[3*2-5];
|
||||
acc += bp[5*2-5]; rp[1] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[2]; acc += bp[3*2-6];
|
||||
acc += bp[4*2-6];
|
||||
acc += bp[5*2-6]; rp[2] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[3]; acc += bp[3*2-5];
|
||||
acc += bp[4*2-5];
|
||||
acc += bp[5*2-5]; rp[3] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[4]; acc += bp[4*2-6];
|
||||
acc += bp[5*2-6]; rp[4] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[5]; acc += bp[4*2-5];
|
||||
acc += bp[5*2-5]; rp[5] = (unsigned int)acc;
|
||||
|
||||
carry = (int)(acc>>32);
|
||||
}
|
||||
#else
|
||||
{
|
||||
BN_ULONG t_d[BN_NIST_192_TOP];
|
||||
|
||||
nist_set_192(t_d, buf.bn, 0, 3, 3);
|
||||
carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
|
||||
nist_set_192(t_d, buf, 4, 4, 0);
|
||||
nist_set_192(t_d, buf.bn, 4, 4, 0);
|
||||
carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
|
||||
nist_set_192(t_d, buf, 5, 5, 5)
|
||||
nist_set_192(t_d, buf.bn, 5, 5, 5)
|
||||
carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
|
||||
|
||||
}
|
||||
#endif
|
||||
if (carry > 0)
|
||||
carry = (int)bn_sub_words(r_d,r_d,_nist_p_192[carry-1],BN_NIST_192_TOP);
|
||||
else
|
||||
|
@ -435,8 +479,7 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
|
|||
int top = a->top, i;
|
||||
int carry;
|
||||
BN_ULONG *r_d, *a_d = a->d;
|
||||
BN_ULONG t_d[BN_NIST_224_TOP],
|
||||
buf[BN_NIST_224_TOP],
|
||||
BN_ULONG buf[BN_NIST_224_TOP],
|
||||
c_d[BN_NIST_224_TOP],
|
||||
*res;
|
||||
PTR_SIZE_INT mask;
|
||||
|
@ -474,14 +517,54 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
|
|||
|
||||
#if BN_BITS2==64
|
||||
/* copy upper 256 bits of 448 bit number ... */
|
||||
nist_cp_bn_0(t_d, a_d + (BN_NIST_224_TOP-1), top - (BN_NIST_224_TOP-1), BN_NIST_224_TOP);
|
||||
nist_cp_bn_0(c_d, a_d + (BN_NIST_224_TOP-1), top - (BN_NIST_224_TOP-1), BN_NIST_224_TOP);
|
||||
/* ... and right shift by 32 to obtain upper 224 bits */
|
||||
nist_set_224(buf, t_d, 14, 13, 12, 11, 10, 9, 8);
|
||||
nist_set_224(buf, c_d, 14, 13, 12, 11, 10, 9, 8);
|
||||
/* truncate lower part to 224 bits too */
|
||||
r_d[BN_NIST_224_TOP-1] &= BN_MASK2l;
|
||||
#else
|
||||
nist_cp_bn_0(buf, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP, BN_NIST_224_TOP);
|
||||
#endif
|
||||
|
||||
#if defined(NIST_INT64) && BN_BITS2!=64
|
||||
{
|
||||
NIST_INT64 acc; /* accumulator */
|
||||
unsigned int *rp=(unsigned int *)r_d;
|
||||
const unsigned int *bp=(const unsigned int *)buf;
|
||||
|
||||
acc = rp[0]; acc -= bp[7-7];
|
||||
acc -= bp[11-7]; rp[0] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[1]; acc -= bp[8-7];
|
||||
acc -= bp[12-7]; rp[1] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[2]; acc -= bp[9-7];
|
||||
acc -= bp[13-7]; rp[2] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[3]; acc += bp[7-7];
|
||||
acc += bp[11-7];
|
||||
acc -= bp[10-7]; rp[3] = (unsigned int)acc; acc>>= 32;
|
||||
|
||||
acc += rp[4]; acc += bp[8-7];
|
||||
acc += bp[12-7];
|
||||
acc -= bp[11-7]; rp[4] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[5]; acc += bp[9-7];
|
||||
acc += bp[13-7];
|
||||
acc -= bp[12-7]; rp[5] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[6]; acc += bp[10-7];
|
||||
acc -= bp[13-7]; rp[6] = (unsigned int)acc;
|
||||
|
||||
carry = (int)(acc>>32);
|
||||
# if BN_BITS2==64
|
||||
rp[7] = carry;
|
||||
# endif
|
||||
}
|
||||
#else
|
||||
{
|
||||
BN_ULONG t_d[BN_NIST_224_TOP];
|
||||
|
||||
nist_set_224(t_d, buf, 10, 9, 8, 7, 0, 0, 0);
|
||||
carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
|
||||
nist_set_224(t_d, buf, 0, 13, 12, 11, 0, 0, 0);
|
||||
|
@ -493,6 +576,8 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
|
|||
|
||||
#if BN_BITS2==64
|
||||
carry = (int)(r_d[BN_NIST_224_TOP-1]>>32);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
u.f = bn_sub_words;
|
||||
if (carry > 0)
|
||||
|
@ -548,9 +633,11 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
|
|||
int i, top = a->top;
|
||||
int carry = 0;
|
||||
register BN_ULONG *a_d = a->d, *r_d;
|
||||
BN_ULONG t_d[BN_NIST_256_TOP],
|
||||
buf[BN_NIST_256_TOP],
|
||||
c_d[BN_NIST_256_TOP],
|
||||
union {
|
||||
BN_ULONG bn[BN_NIST_256_TOP];
|
||||
unsigned int ui[BN_NIST_256_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)];
|
||||
} buf;
|
||||
BN_ULONG c_d[BN_NIST_256_TOP],
|
||||
*res;
|
||||
PTR_SIZE_INT mask;
|
||||
union { bn_addsub_f f; PTR_SIZE_INT p; } u;
|
||||
|
@ -584,12 +671,87 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
|
|||
else
|
||||
r_d = a_d;
|
||||
|
||||
nist_cp_bn_0(buf, a_d + BN_NIST_256_TOP, top - BN_NIST_256_TOP, BN_NIST_256_TOP);
|
||||
nist_cp_bn_0(buf.bn, a_d + BN_NIST_256_TOP, top - BN_NIST_256_TOP, BN_NIST_256_TOP);
|
||||
|
||||
#if defined(NIST_INT64)
|
||||
{
|
||||
NIST_INT64 acc; /* accumulator */
|
||||
unsigned int *rp=(unsigned int *)r_d;
|
||||
const unsigned int *bp=(const unsigned int *)buf.ui;
|
||||
|
||||
acc = rp[0]; acc += bp[8-8];
|
||||
acc += bp[9-8];
|
||||
acc -= bp[11-8];
|
||||
acc -= bp[12-8];
|
||||
acc -= bp[13-8];
|
||||
acc -= bp[14-8]; rp[0] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[1]; acc += bp[9-8];
|
||||
acc += bp[10-8];
|
||||
acc -= bp[12-8];
|
||||
acc -= bp[13-8];
|
||||
acc -= bp[14-8];
|
||||
acc -= bp[15-8]; rp[1] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[2]; acc += bp[10-8];
|
||||
acc += bp[11-8];
|
||||
acc -= bp[13-8];
|
||||
acc -= bp[14-8];
|
||||
acc -= bp[15-8]; rp[2] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[3]; acc += bp[11-8];
|
||||
acc += bp[11-8];
|
||||
acc += bp[12-8];
|
||||
acc += bp[12-8];
|
||||
acc += bp[13-8];
|
||||
acc -= bp[15-8];
|
||||
acc -= bp[8-8];
|
||||
acc -= bp[9-8]; rp[3] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[4]; acc += bp[12-8];
|
||||
acc += bp[12-8];
|
||||
acc += bp[13-8];
|
||||
acc += bp[13-8];
|
||||
acc += bp[14-8];
|
||||
acc -= bp[9-8];
|
||||
acc -= bp[10-8]; rp[4] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[5]; acc += bp[13-8];
|
||||
acc += bp[13-8];
|
||||
acc += bp[14-8];
|
||||
acc += bp[14-8];
|
||||
acc += bp[15-8];
|
||||
acc -= bp[10-8];
|
||||
acc -= bp[11-8]; rp[5] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[6]; acc += bp[14-8];
|
||||
acc += bp[14-8];
|
||||
acc += bp[15-8];
|
||||
acc += bp[15-8];
|
||||
acc += bp[14-8];
|
||||
acc += bp[13-8];
|
||||
acc -= bp[8-8];
|
||||
acc -= bp[9-8]; rp[6] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[7]; acc += bp[15-8];
|
||||
acc += bp[15-8];
|
||||
acc += bp[15-8];
|
||||
acc += bp[8 -8];
|
||||
acc -= bp[10-8];
|
||||
acc -= bp[11-8];
|
||||
acc -= bp[12-8];
|
||||
acc -= bp[13-8]; rp[7] = (unsigned int)acc;
|
||||
|
||||
carry = (int)(acc>>32);
|
||||
}
|
||||
#else
|
||||
{
|
||||
BN_ULONG t_d[BN_NIST_256_TOP];
|
||||
|
||||
/*S1*/
|
||||
nist_set_256(t_d, buf, 15, 14, 13, 12, 11, 0, 0, 0);
|
||||
nist_set_256(t_d, buf.bn, 15, 14, 13, 12, 11, 0, 0, 0);
|
||||
/*S2*/
|
||||
nist_set_256(c_d, buf, 0, 15, 14, 13, 12, 0, 0, 0);
|
||||
nist_set_256(c_d, buf.bn, 0, 15, 14, 13, 12, 0, 0, 0);
|
||||
carry = (int)bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP);
|
||||
/* left shift */
|
||||
{
|
||||
|
@ -607,24 +769,26 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
|
|||
}
|
||||
carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
|
||||
/*S3*/
|
||||
nist_set_256(t_d, buf, 15, 14, 0, 0, 0, 10, 9, 8);
|
||||
nist_set_256(t_d, buf.bn, 15, 14, 0, 0, 0, 10, 9, 8);
|
||||
carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
|
||||
/*S4*/
|
||||
nist_set_256(t_d, buf, 8, 13, 15, 14, 13, 11, 10, 9);
|
||||
nist_set_256(t_d, buf.bn, 8, 13, 15, 14, 13, 11, 10, 9);
|
||||
carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
|
||||
/*D1*/
|
||||
nist_set_256(t_d, buf, 10, 8, 0, 0, 0, 13, 12, 11);
|
||||
nist_set_256(t_d, buf.bn, 10, 8, 0, 0, 0, 13, 12, 11);
|
||||
carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
|
||||
/*D2*/
|
||||
nist_set_256(t_d, buf, 11, 9, 0, 0, 15, 14, 13, 12);
|
||||
nist_set_256(t_d, buf.bn, 11, 9, 0, 0, 15, 14, 13, 12);
|
||||
carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
|
||||
/*D3*/
|
||||
nist_set_256(t_d, buf, 12, 0, 10, 9, 8, 15, 14, 13);
|
||||
nist_set_256(t_d, buf.bn, 12, 0, 10, 9, 8, 15, 14, 13);
|
||||
carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
|
||||
/*D4*/
|
||||
nist_set_256(t_d, buf, 13, 0, 11, 10, 9, 0, 15, 14);
|
||||
nist_set_256(t_d, buf.bn, 13, 0, 11, 10, 9, 0, 15, 14);
|
||||
carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
|
||||
|
||||
}
|
||||
#endif
|
||||
/* see BN_nist_mod_224 for explanation */
|
||||
u.f = bn_sub_words;
|
||||
if (carry > 0)
|
||||
|
@ -672,9 +836,11 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
|
|||
int i, top = a->top;
|
||||
int carry = 0;
|
||||
register BN_ULONG *r_d, *a_d = a->d;
|
||||
BN_ULONG t_d[BN_NIST_384_TOP],
|
||||
buf[BN_NIST_384_TOP],
|
||||
c_d[BN_NIST_384_TOP],
|
||||
union {
|
||||
BN_ULONG bn[BN_NIST_384_TOP];
|
||||
unsigned int ui[BN_NIST_384_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)];
|
||||
} buf;
|
||||
BN_ULONG c_d[BN_NIST_384_TOP],
|
||||
*res;
|
||||
PTR_SIZE_INT mask;
|
||||
union { bn_addsub_f f; PTR_SIZE_INT p; } u;
|
||||
|
@ -709,10 +875,100 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
|
|||
else
|
||||
r_d = a_d;
|
||||
|
||||
nist_cp_bn_0(buf, a_d + BN_NIST_384_TOP, top - BN_NIST_384_TOP, BN_NIST_384_TOP);
|
||||
nist_cp_bn_0(buf.bn, a_d + BN_NIST_384_TOP, top - BN_NIST_384_TOP, BN_NIST_384_TOP);
|
||||
|
||||
#if defined(NIST_INT64)
|
||||
{
|
||||
NIST_INT64 acc; /* accumulator */
|
||||
unsigned int *rp=(unsigned int *)r_d;
|
||||
const unsigned int *bp=(const unsigned int *)buf.ui;
|
||||
|
||||
acc = rp[0]; acc += bp[12-12];
|
||||
acc += bp[21-12];
|
||||
acc += bp[20-12];
|
||||
acc -= bp[23-12]; rp[0] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[1]; acc += bp[13-12];
|
||||
acc += bp[22-12];
|
||||
acc += bp[23-12];
|
||||
acc -= bp[12-12];
|
||||
acc -= bp[20-12]; rp[1] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[2]; acc += bp[14-12];
|
||||
acc += bp[23-12];
|
||||
acc -= bp[13-12];
|
||||
acc -= bp[21-12]; rp[2] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[3]; acc += bp[15-12];
|
||||
acc += bp[12-12];
|
||||
acc += bp[20-12];
|
||||
acc += bp[21-12];
|
||||
acc -= bp[14-12];
|
||||
acc -= bp[22-12];
|
||||
acc -= bp[23-12]; rp[3] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[4]; acc += bp[21-12];
|
||||
acc += bp[21-12];
|
||||
acc += bp[16-12];
|
||||
acc += bp[13-12];
|
||||
acc += bp[12-12];
|
||||
acc += bp[20-12];
|
||||
acc += bp[22-12];
|
||||
acc -= bp[15-12];
|
||||
acc -= bp[23-12];
|
||||
acc -= bp[23-12]; rp[4] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[5]; acc += bp[22-12];
|
||||
acc += bp[22-12];
|
||||
acc += bp[17-12];
|
||||
acc += bp[14-12];
|
||||
acc += bp[13-12];
|
||||
acc += bp[21-12];
|
||||
acc += bp[23-12];
|
||||
acc -= bp[16-12]; rp[5] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[6]; acc += bp[23-12];
|
||||
acc += bp[23-12];
|
||||
acc += bp[18-12];
|
||||
acc += bp[15-12];
|
||||
acc += bp[14-12];
|
||||
acc += bp[22-12];
|
||||
acc -= bp[17-12]; rp[6] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[7]; acc += bp[19-12];
|
||||
acc += bp[16-12];
|
||||
acc += bp[15-12];
|
||||
acc += bp[23-12];
|
||||
acc -= bp[18-12]; rp[7] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[8]; acc += bp[20-12];
|
||||
acc += bp[17-12];
|
||||
acc += bp[16-12];
|
||||
acc -= bp[19-12]; rp[8] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[9]; acc += bp[21-12];
|
||||
acc += bp[18-12];
|
||||
acc += bp[17-12];
|
||||
acc -= bp[20-12]; rp[9] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[10]; acc += bp[22-12];
|
||||
acc += bp[19-12];
|
||||
acc += bp[18-12];
|
||||
acc -= bp[21-12]; rp[10] = (unsigned int)acc; acc >>= 32;
|
||||
|
||||
acc += rp[11]; acc += bp[23-12];
|
||||
acc += bp[20-12];
|
||||
acc += bp[19-12];
|
||||
acc -= bp[22-12]; rp[11] = (unsigned int)acc;
|
||||
|
||||
carry = (int)(acc>>32);
|
||||
}
|
||||
#else
|
||||
{
|
||||
BN_ULONG t_d[BN_NIST_384_TOP];
|
||||
|
||||
/*S1*/
|
||||
nist_set_256(t_d, buf, 0, 0, 0, 0, 0, 23-4, 22-4, 21-4);
|
||||
nist_set_256(t_d, buf.bn, 0, 0, 0, 0, 0, 23-4, 22-4, 21-4);
|
||||
/* left shift */
|
||||
{
|
||||
register BN_ULONG *ap,t,c;
|
||||
|
@ -729,29 +985,31 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
|
|||
carry = (int)bn_add_words(r_d+(128/BN_BITS2), r_d+(128/BN_BITS2),
|
||||
t_d, BN_NIST_256_TOP);
|
||||
/*S2 */
|
||||
carry += (int)bn_add_words(r_d, r_d, buf, BN_NIST_384_TOP);
|
||||
carry += (int)bn_add_words(r_d, r_d, buf.bn, BN_NIST_384_TOP);
|
||||
/*S3*/
|
||||
nist_set_384(t_d,buf,20,19,18,17,16,15,14,13,12,23,22,21);
|
||||
nist_set_384(t_d,buf.bn,20,19,18,17,16,15,14,13,12,23,22,21);
|
||||
carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
|
||||
/*S4*/
|
||||
nist_set_384(t_d,buf,19,18,17,16,15,14,13,12,20,0,23,0);
|
||||
nist_set_384(t_d,buf.bn,19,18,17,16,15,14,13,12,20,0,23,0);
|
||||
carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
|
||||
/*S5*/
|
||||
nist_set_384(t_d, buf,0,0,0,0,23,22,21,20,0,0,0,0);
|
||||
nist_set_384(t_d, buf.bn,0,0,0,0,23,22,21,20,0,0,0,0);
|
||||
carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
|
||||
/*S6*/
|
||||
nist_set_384(t_d,buf,0,0,0,0,0,0,23,22,21,0,0,20);
|
||||
nist_set_384(t_d,buf.bn,0,0,0,0,0,0,23,22,21,0,0,20);
|
||||
carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
|
||||
/*D1*/
|
||||
nist_set_384(t_d,buf,22,21,20,19,18,17,16,15,14,13,12,23);
|
||||
nist_set_384(t_d,buf.bn,22,21,20,19,18,17,16,15,14,13,12,23);
|
||||
carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
|
||||
/*D2*/
|
||||
nist_set_384(t_d,buf,0,0,0,0,0,0,0,23,22,21,20,0);
|
||||
nist_set_384(t_d,buf.bn,0,0,0,0,0,0,0,23,22,21,20,0);
|
||||
carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
|
||||
/*D3*/
|
||||
nist_set_384(t_d,buf,0,0,0,0,0,0,0,23,23,0,0,0);
|
||||
nist_set_384(t_d,buf.bn,0,0,0,0,0,0,0,23,23,0,0,0);
|
||||
carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
|
||||
|
||||
}
|
||||
#endif
|
||||
/* see BN_nist_mod_224 for explanation */
|
||||
u.f = bn_sub_words;
|
||||
if (carry > 0)
|
||||
|
|
|
@ -99,7 +99,7 @@ int BN_lshift1(BIGNUM *r, const BIGNUM *a)
|
|||
int BN_rshift1(BIGNUM *r, const BIGNUM *a)
|
||||
{
|
||||
BN_ULONG *ap,*rp,t,c;
|
||||
int i;
|
||||
int i,j;
|
||||
|
||||
bn_check_top(r);
|
||||
bn_check_top(a);
|
||||
|
@ -109,22 +109,25 @@ int BN_rshift1(BIGNUM *r, const BIGNUM *a)
|
|||
BN_zero(r);
|
||||
return(1);
|
||||
}
|
||||
i = a->top;
|
||||
ap= a->d;
|
||||
j = i-(ap[i-1]==1);
|
||||
if (a != r)
|
||||
{
|
||||
if (bn_wexpand(r,a->top) == NULL) return(0);
|
||||
r->top=a->top;
|
||||
if (bn_wexpand(r,j) == NULL) return(0);
|
||||
r->neg=a->neg;
|
||||
}
|
||||
ap=a->d;
|
||||
rp=r->d;
|
||||
c=0;
|
||||
for (i=a->top-1; i>=0; i--)
|
||||
t=ap[--i];
|
||||
c=(t&1)?BN_TBIT:0;
|
||||
if (t>>=1) rp[i]=t;
|
||||
while (i>0)
|
||||
{
|
||||
t=ap[i];
|
||||
t=ap[--i];
|
||||
rp[i]=((t>>1)&BN_MASK2)|c;
|
||||
c=(t&1)?BN_TBIT:0;
|
||||
}
|
||||
bn_correct_top(r);
|
||||
r->top=j;
|
||||
bn_check_top(r);
|
||||
return(1);
|
||||
}
|
||||
|
@ -182,10 +185,11 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
|
|||
BN_zero(r);
|
||||
return(1);
|
||||
}
|
||||
i = (BN_num_bits(a)-n+(BN_BITS2-1))/BN_BITS2;
|
||||
if (r != a)
|
||||
{
|
||||
r->neg=a->neg;
|
||||
if (bn_wexpand(r,a->top-nw+1) == NULL) return(0);
|
||||
if (bn_wexpand(r,i) == NULL) return(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -196,7 +200,7 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
|
|||
f= &(a->d[nw]);
|
||||
t=r->d;
|
||||
j=a->top-nw;
|
||||
r->top=j;
|
||||
r->top=i;
|
||||
|
||||
if (rb == 0)
|
||||
{
|
||||
|
@ -212,9 +216,8 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
|
|||
l= *(f++);
|
||||
*(t++) =(tmp|(l<<lb))&BN_MASK2;
|
||||
}
|
||||
*(t++) =(l>>rb)&BN_MASK2;
|
||||
if ((l = (l>>rb)&BN_MASK2)) *(t) = l;
|
||||
}
|
||||
bn_correct_top(r);
|
||||
bn_check_top(r);
|
||||
return(1);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue