ec/curve25519.c: reorganize for better accessibility.
Move base 2^64 code to own #if section. It was nested in base 2^51 section, which arguably might have been tricky to follow. Reviewed-by: Rich Salz <rsalz@openssl.org> (Merged from https://github.com/openssl/openssl/pull/6699)
This commit is contained in:
parent
d3e3263072
commit
3c849bc901
1 changed files with 150 additions and 143 deletions
|
@ -11,149 +11,23 @@
|
|||
#include "ec_lcl.h"
|
||||
#include <openssl/sha.h>
|
||||
|
||||
#if defined(X25519_ASM) \
|
||||
|| ( (defined(__SIZEOF_INT128__) && __SIZEOF_INT128__ == 16) \
|
||||
&& !defined(__sparc__) \
|
||||
&& !(defined(__ANDROID__) && !defined(__clang__)) )
|
||||
/*
|
||||
* Base 2^51 implementation.
|
||||
*/
|
||||
# define BASE_2_51_IMPLEMENTED
|
||||
#if defined(X25519_ASM) && (defined(__x86_64) || defined(__x86_64__) || \
|
||||
defined(_M_AMD64) || defined(_M_X64))
|
||||
|
||||
typedef uint64_t fe51[5];
|
||||
# if !defined(X25519_ASM)
|
||||
typedef __uint128_t u128;
|
||||
# endif
|
||||
|
||||
static const uint64_t MASK51 = 0x7ffffffffffff;
|
||||
|
||||
static uint64_t load_7(const uint8_t *in)
|
||||
{
|
||||
uint64_t result;
|
||||
|
||||
result = in[0];
|
||||
result |= ((uint64_t)in[1]) << 8;
|
||||
result |= ((uint64_t)in[2]) << 16;
|
||||
result |= ((uint64_t)in[3]) << 24;
|
||||
result |= ((uint64_t)in[4]) << 32;
|
||||
result |= ((uint64_t)in[5]) << 40;
|
||||
result |= ((uint64_t)in[6]) << 48;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint64_t load_6(const uint8_t *in)
|
||||
{
|
||||
uint64_t result;
|
||||
|
||||
result = in[0];
|
||||
result |= ((uint64_t)in[1]) << 8;
|
||||
result |= ((uint64_t)in[2]) << 16;
|
||||
result |= ((uint64_t)in[3]) << 24;
|
||||
result |= ((uint64_t)in[4]) << 32;
|
||||
result |= ((uint64_t)in[5]) << 40;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void fe51_frombytes(fe51 h, const uint8_t *s)
|
||||
{
|
||||
uint64_t h0 = load_7(s); /* 56 bits */
|
||||
uint64_t h1 = load_6(s + 7) << 5; /* 53 bits */
|
||||
uint64_t h2 = load_7(s + 13) << 2; /* 58 bits */
|
||||
uint64_t h3 = load_6(s + 20) << 7; /* 55 bits */
|
||||
uint64_t h4 = (load_6(s + 26) & 0x7fffffffffff) << 4; /* 51 bits */
|
||||
|
||||
h1 |= h0 >> 51; h0 &= MASK51;
|
||||
h2 |= h1 >> 51; h1 &= MASK51;
|
||||
h3 |= h2 >> 51; h2 &= MASK51;
|
||||
h4 |= h3 >> 51; h3 &= MASK51;
|
||||
|
||||
h[0] = h0;
|
||||
h[1] = h1;
|
||||
h[2] = h2;
|
||||
h[3] = h3;
|
||||
h[4] = h4;
|
||||
}
|
||||
|
||||
static void fe51_tobytes(uint8_t *s, const fe51 h)
|
||||
{
|
||||
uint64_t h0 = h[0];
|
||||
uint64_t h1 = h[1];
|
||||
uint64_t h2 = h[2];
|
||||
uint64_t h3 = h[3];
|
||||
uint64_t h4 = h[4];
|
||||
uint64_t q;
|
||||
|
||||
/* compare to modulus */
|
||||
q = (h0 + 19) >> 51;
|
||||
q = (h1 + q) >> 51;
|
||||
q = (h2 + q) >> 51;
|
||||
q = (h3 + q) >> 51;
|
||||
q = (h4 + q) >> 51;
|
||||
|
||||
/* full reduce */
|
||||
h0 += 19 * q;
|
||||
h1 += h0 >> 51; h0 &= MASK51;
|
||||
h2 += h1 >> 51; h1 &= MASK51;
|
||||
h3 += h2 >> 51; h2 &= MASK51;
|
||||
h4 += h3 >> 51; h3 &= MASK51;
|
||||
h4 &= MASK51;
|
||||
|
||||
/* smash */
|
||||
s[0] = (uint8_t)(h0 >> 0);
|
||||
s[1] = (uint8_t)(h0 >> 8);
|
||||
s[2] = (uint8_t)(h0 >> 16);
|
||||
s[3] = (uint8_t)(h0 >> 24);
|
||||
s[4] = (uint8_t)(h0 >> 32);
|
||||
s[5] = (uint8_t)(h0 >> 40);
|
||||
s[6] = (uint8_t)((h0 >> 48) | ((uint32_t)h1 << 3));
|
||||
s[7] = (uint8_t)(h1 >> 5);
|
||||
s[8] = (uint8_t)(h1 >> 13);
|
||||
s[9] = (uint8_t)(h1 >> 21);
|
||||
s[10] = (uint8_t)(h1 >> 29);
|
||||
s[11] = (uint8_t)(h1 >> 37);
|
||||
s[12] = (uint8_t)((h1 >> 45) | ((uint32_t)h2 << 6));
|
||||
s[13] = (uint8_t)(h2 >> 2);
|
||||
s[14] = (uint8_t)(h2 >> 10);
|
||||
s[15] = (uint8_t)(h2 >> 18);
|
||||
s[16] = (uint8_t)(h2 >> 26);
|
||||
s[17] = (uint8_t)(h2 >> 34);
|
||||
s[18] = (uint8_t)(h2 >> 42);
|
||||
s[19] = (uint8_t)((h2 >> 50) | ((uint32_t)h3 << 1));
|
||||
s[20] = (uint8_t)(h3 >> 7);
|
||||
s[21] = (uint8_t)(h3 >> 15);
|
||||
s[22] = (uint8_t)(h3 >> 23);
|
||||
s[23] = (uint8_t)(h3 >> 31);
|
||||
s[24] = (uint8_t)(h3 >> 39);
|
||||
s[25] = (uint8_t)((h3 >> 47) | ((uint32_t)h4 << 4));
|
||||
s[26] = (uint8_t)(h4 >> 4);
|
||||
s[27] = (uint8_t)(h4 >> 12);
|
||||
s[28] = (uint8_t)(h4 >> 20);
|
||||
s[29] = (uint8_t)(h4 >> 28);
|
||||
s[30] = (uint8_t)(h4 >> 36);
|
||||
s[31] = (uint8_t)(h4 >> 44);
|
||||
}
|
||||
|
||||
# ifdef X25519_ASM
|
||||
void x25519_fe51_mul(fe51 h, const fe51 f, const fe51 g);
|
||||
void x25519_fe51_sqr(fe51 h, const fe51 f);
|
||||
void x25519_fe51_mul121666(fe51 h, fe51 f);
|
||||
# define fe51_mul x25519_fe51_mul
|
||||
# define fe51_sq x25519_fe51_sqr
|
||||
# define fe51_mul121666 x25519_fe51_mul121666
|
||||
|
||||
# if defined(__x86_64) || defined(__x86_64__) || \
|
||||
defined(_M_AMD64) || defined(_M_X64)
|
||||
|
||||
# define BASE_2_64_IMPLEMENTED
|
||||
# define BASE_2_64_IMPLEMENTED
|
||||
|
||||
typedef uint64_t fe64[4];
|
||||
|
||||
int x25519_fe64_eligible(void);
|
||||
|
||||
/*
|
||||
* There are no reference C implementations for this radix.
|
||||
* Following subroutines perform corresponding operations modulo
|
||||
* 2^256-38, i.e. double the curve modulus. However, inputs and
|
||||
* outputs are permitted to be partially reduced, i.e. to remain
|
||||
* in [0..2^256) range. It's all tied up in final fe64_tobytes
|
||||
* that performs full reduction modulo 2^255-19.
|
||||
*
|
||||
* There are no reference C implementations for these.
|
||||
*/
|
||||
void x25519_fe64_mul(fe64 h, const fe64 f, const fe64 g);
|
||||
void x25519_fe64_sqr(fe64 h, const fe64 f);
|
||||
|
@ -161,12 +35,12 @@ void x25519_fe64_mul121666(fe64 h, fe64 f);
|
|||
void x25519_fe64_add(fe64 h, const fe64 f, const fe64 g);
|
||||
void x25519_fe64_sub(fe64 h, const fe64 f, const fe64 g);
|
||||
void x25519_fe64_tobytes(uint8_t *s, const fe64 f);
|
||||
# define fe64_mul x25519_fe64_mul
|
||||
# define fe64_sqr x25519_fe64_sqr
|
||||
# define fe64_mul121666 x25519_fe64_mul121666
|
||||
# define fe64_add x25519_fe64_add
|
||||
# define fe64_sub x25519_fe64_sub
|
||||
# define fe64_tobytes x25519_fe64_tobytes
|
||||
# define fe64_mul x25519_fe64_mul
|
||||
# define fe64_sqr x25519_fe64_sqr
|
||||
# define fe64_mul121666 x25519_fe64_mul121666
|
||||
# define fe64_add x25519_fe64_add
|
||||
# define fe64_sub x25519_fe64_sub
|
||||
# define fe64_tobytes x25519_fe64_tobytes
|
||||
|
||||
static uint64_t load_8(const uint8_t *in)
|
||||
{
|
||||
|
@ -375,10 +249,143 @@ static void x25519_scalar_mulx(uint8_t out[32], const uint8_t scalar[32],
|
|||
|
||||
OPENSSL_cleanse(e, sizeof(e));
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(X25519_ASM) \
|
||||
|| ( (defined(__SIZEOF_INT128__) && __SIZEOF_INT128__ == 16) \
|
||||
&& !defined(__sparc__) \
|
||||
&& !(defined(__ANDROID__) && !defined(__clang__)) )
|
||||
/*
|
||||
* Base 2^51 implementation. It's virtually no different from reference
|
||||
* base 2^25.5 implementation in respect to lax boundary conditions for
|
||||
* intermediate values and even individual limbs. So that whatever you
|
||||
* know about the reference, applies even here...
|
||||
*/
|
||||
# define BASE_2_51_IMPLEMENTED
|
||||
|
||||
typedef uint64_t fe51[5];
|
||||
|
||||
static const uint64_t MASK51 = 0x7ffffffffffff;
|
||||
|
||||
static uint64_t load_7(const uint8_t *in)
|
||||
{
|
||||
uint64_t result;
|
||||
|
||||
result = in[0];
|
||||
result |= ((uint64_t)in[1]) << 8;
|
||||
result |= ((uint64_t)in[2]) << 16;
|
||||
result |= ((uint64_t)in[3]) << 24;
|
||||
result |= ((uint64_t)in[4]) << 32;
|
||||
result |= ((uint64_t)in[5]) << 40;
|
||||
result |= ((uint64_t)in[6]) << 48;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint64_t load_6(const uint8_t *in)
|
||||
{
|
||||
uint64_t result;
|
||||
|
||||
result = in[0];
|
||||
result |= ((uint64_t)in[1]) << 8;
|
||||
result |= ((uint64_t)in[2]) << 16;
|
||||
result |= ((uint64_t)in[3]) << 24;
|
||||
result |= ((uint64_t)in[4]) << 32;
|
||||
result |= ((uint64_t)in[5]) << 40;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void fe51_frombytes(fe51 h, const uint8_t *s)
|
||||
{
|
||||
uint64_t h0 = load_7(s); /* 56 bits */
|
||||
uint64_t h1 = load_6(s + 7) << 5; /* 53 bits */
|
||||
uint64_t h2 = load_7(s + 13) << 2; /* 58 bits */
|
||||
uint64_t h3 = load_6(s + 20) << 7; /* 55 bits */
|
||||
uint64_t h4 = (load_6(s + 26) & 0x7fffffffffff) << 4; /* 51 bits */
|
||||
|
||||
h1 |= h0 >> 51; h0 &= MASK51;
|
||||
h2 |= h1 >> 51; h1 &= MASK51;
|
||||
h3 |= h2 >> 51; h2 &= MASK51;
|
||||
h4 |= h3 >> 51; h3 &= MASK51;
|
||||
|
||||
h[0] = h0;
|
||||
h[1] = h1;
|
||||
h[2] = h2;
|
||||
h[3] = h3;
|
||||
h[4] = h4;
|
||||
}
|
||||
|
||||
static void fe51_tobytes(uint8_t *s, const fe51 h)
|
||||
{
|
||||
uint64_t h0 = h[0];
|
||||
uint64_t h1 = h[1];
|
||||
uint64_t h2 = h[2];
|
||||
uint64_t h3 = h[3];
|
||||
uint64_t h4 = h[4];
|
||||
uint64_t q;
|
||||
|
||||
/* compare to modulus */
|
||||
q = (h0 + 19) >> 51;
|
||||
q = (h1 + q) >> 51;
|
||||
q = (h2 + q) >> 51;
|
||||
q = (h3 + q) >> 51;
|
||||
q = (h4 + q) >> 51;
|
||||
|
||||
/* full reduce */
|
||||
h0 += 19 * q;
|
||||
h1 += h0 >> 51; h0 &= MASK51;
|
||||
h2 += h1 >> 51; h1 &= MASK51;
|
||||
h3 += h2 >> 51; h2 &= MASK51;
|
||||
h4 += h3 >> 51; h3 &= MASK51;
|
||||
h4 &= MASK51;
|
||||
|
||||
/* smash */
|
||||
s[0] = (uint8_t)(h0 >> 0);
|
||||
s[1] = (uint8_t)(h0 >> 8);
|
||||
s[2] = (uint8_t)(h0 >> 16);
|
||||
s[3] = (uint8_t)(h0 >> 24);
|
||||
s[4] = (uint8_t)(h0 >> 32);
|
||||
s[5] = (uint8_t)(h0 >> 40);
|
||||
s[6] = (uint8_t)((h0 >> 48) | ((uint32_t)h1 << 3));
|
||||
s[7] = (uint8_t)(h1 >> 5);
|
||||
s[8] = (uint8_t)(h1 >> 13);
|
||||
s[9] = (uint8_t)(h1 >> 21);
|
||||
s[10] = (uint8_t)(h1 >> 29);
|
||||
s[11] = (uint8_t)(h1 >> 37);
|
||||
s[12] = (uint8_t)((h1 >> 45) | ((uint32_t)h2 << 6));
|
||||
s[13] = (uint8_t)(h2 >> 2);
|
||||
s[14] = (uint8_t)(h2 >> 10);
|
||||
s[15] = (uint8_t)(h2 >> 18);
|
||||
s[16] = (uint8_t)(h2 >> 26);
|
||||
s[17] = (uint8_t)(h2 >> 34);
|
||||
s[18] = (uint8_t)(h2 >> 42);
|
||||
s[19] = (uint8_t)((h2 >> 50) | ((uint32_t)h3 << 1));
|
||||
s[20] = (uint8_t)(h3 >> 7);
|
||||
s[21] = (uint8_t)(h3 >> 15);
|
||||
s[22] = (uint8_t)(h3 >> 23);
|
||||
s[23] = (uint8_t)(h3 >> 31);
|
||||
s[24] = (uint8_t)(h3 >> 39);
|
||||
s[25] = (uint8_t)((h3 >> 47) | ((uint32_t)h4 << 4));
|
||||
s[26] = (uint8_t)(h4 >> 4);
|
||||
s[27] = (uint8_t)(h4 >> 12);
|
||||
s[28] = (uint8_t)(h4 >> 20);
|
||||
s[29] = (uint8_t)(h4 >> 28);
|
||||
s[30] = (uint8_t)(h4 >> 36);
|
||||
s[31] = (uint8_t)(h4 >> 44);
|
||||
}
|
||||
|
||||
# if defined(X25519_ASM)
|
||||
void x25519_fe51_mul(fe51 h, const fe51 f, const fe51 g);
|
||||
void x25519_fe51_sqr(fe51 h, const fe51 f);
|
||||
void x25519_fe51_mul121666(fe51 h, fe51 f);
|
||||
# define fe51_mul x25519_fe51_mul
|
||||
# define fe51_sq x25519_fe51_sqr
|
||||
# define fe51_mul121666 x25519_fe51_mul121666
|
||||
# else
|
||||
|
||||
typedef __uint128_t u128;
|
||||
|
||||
static void fe51_mul(fe51 h, const fe51 f, const fe51 g)
|
||||
{
|
||||
u128 h0, h1, h2, h3, h4;
|
||||
|
|
Loading…
Reference in a new issue