ec/curve25519.c: reorganize for better accessibility.

Move base 2^64 code to own #if section. It was nested in base 2^51 section,
which arguably might have been tricky to follow.

Reviewed-by: Rich Salz <rsalz@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6699)
This commit is contained in:
Andy Polyakov 2018-07-12 11:53:16 +02:00
parent d3e3263072
commit 3c849bc901

View file

@ -11,149 +11,23 @@
#include "ec_lcl.h"
#include <openssl/sha.h>
#if defined(X25519_ASM) \
|| ( (defined(__SIZEOF_INT128__) && __SIZEOF_INT128__ == 16) \
&& !defined(__sparc__) \
&& !(defined(__ANDROID__) && !defined(__clang__)) )
/*
* Base 2^51 implementation.
*/
# define BASE_2_51_IMPLEMENTED
#if defined(X25519_ASM) && (defined(__x86_64) || defined(__x86_64__) || \
defined(_M_AMD64) || defined(_M_X64))
typedef uint64_t fe51[5];
# if !defined(X25519_ASM)
typedef __uint128_t u128;
# endif
static const uint64_t MASK51 = 0x7ffffffffffff;
static uint64_t load_7(const uint8_t *in)
{
uint64_t result;
result = in[0];
result |= ((uint64_t)in[1]) << 8;
result |= ((uint64_t)in[2]) << 16;
result |= ((uint64_t)in[3]) << 24;
result |= ((uint64_t)in[4]) << 32;
result |= ((uint64_t)in[5]) << 40;
result |= ((uint64_t)in[6]) << 48;
return result;
}
static uint64_t load_6(const uint8_t *in)
{
uint64_t result;
result = in[0];
result |= ((uint64_t)in[1]) << 8;
result |= ((uint64_t)in[2]) << 16;
result |= ((uint64_t)in[3]) << 24;
result |= ((uint64_t)in[4]) << 32;
result |= ((uint64_t)in[5]) << 40;
return result;
}
static void fe51_frombytes(fe51 h, const uint8_t *s)
{
uint64_t h0 = load_7(s); /* 56 bits */
uint64_t h1 = load_6(s + 7) << 5; /* 53 bits */
uint64_t h2 = load_7(s + 13) << 2; /* 58 bits */
uint64_t h3 = load_6(s + 20) << 7; /* 55 bits */
uint64_t h4 = (load_6(s + 26) & 0x7fffffffffff) << 4; /* 51 bits */
h1 |= h0 >> 51; h0 &= MASK51;
h2 |= h1 >> 51; h1 &= MASK51;
h3 |= h2 >> 51; h2 &= MASK51;
h4 |= h3 >> 51; h3 &= MASK51;
h[0] = h0;
h[1] = h1;
h[2] = h2;
h[3] = h3;
h[4] = h4;
}
static void fe51_tobytes(uint8_t *s, const fe51 h)
{
uint64_t h0 = h[0];
uint64_t h1 = h[1];
uint64_t h2 = h[2];
uint64_t h3 = h[3];
uint64_t h4 = h[4];
uint64_t q;
/* compare to modulus */
q = (h0 + 19) >> 51;
q = (h1 + q) >> 51;
q = (h2 + q) >> 51;
q = (h3 + q) >> 51;
q = (h4 + q) >> 51;
/* full reduce */
h0 += 19 * q;
h1 += h0 >> 51; h0 &= MASK51;
h2 += h1 >> 51; h1 &= MASK51;
h3 += h2 >> 51; h2 &= MASK51;
h4 += h3 >> 51; h3 &= MASK51;
h4 &= MASK51;
/* smash */
s[0] = (uint8_t)(h0 >> 0);
s[1] = (uint8_t)(h0 >> 8);
s[2] = (uint8_t)(h0 >> 16);
s[3] = (uint8_t)(h0 >> 24);
s[4] = (uint8_t)(h0 >> 32);
s[5] = (uint8_t)(h0 >> 40);
s[6] = (uint8_t)((h0 >> 48) | ((uint32_t)h1 << 3));
s[7] = (uint8_t)(h1 >> 5);
s[8] = (uint8_t)(h1 >> 13);
s[9] = (uint8_t)(h1 >> 21);
s[10] = (uint8_t)(h1 >> 29);
s[11] = (uint8_t)(h1 >> 37);
s[12] = (uint8_t)((h1 >> 45) | ((uint32_t)h2 << 6));
s[13] = (uint8_t)(h2 >> 2);
s[14] = (uint8_t)(h2 >> 10);
s[15] = (uint8_t)(h2 >> 18);
s[16] = (uint8_t)(h2 >> 26);
s[17] = (uint8_t)(h2 >> 34);
s[18] = (uint8_t)(h2 >> 42);
s[19] = (uint8_t)((h2 >> 50) | ((uint32_t)h3 << 1));
s[20] = (uint8_t)(h3 >> 7);
s[21] = (uint8_t)(h3 >> 15);
s[22] = (uint8_t)(h3 >> 23);
s[23] = (uint8_t)(h3 >> 31);
s[24] = (uint8_t)(h3 >> 39);
s[25] = (uint8_t)((h3 >> 47) | ((uint32_t)h4 << 4));
s[26] = (uint8_t)(h4 >> 4);
s[27] = (uint8_t)(h4 >> 12);
s[28] = (uint8_t)(h4 >> 20);
s[29] = (uint8_t)(h4 >> 28);
s[30] = (uint8_t)(h4 >> 36);
s[31] = (uint8_t)(h4 >> 44);
}
# ifdef X25519_ASM
void x25519_fe51_mul(fe51 h, const fe51 f, const fe51 g);
void x25519_fe51_sqr(fe51 h, const fe51 f);
void x25519_fe51_mul121666(fe51 h, fe51 f);
# define fe51_mul x25519_fe51_mul
# define fe51_sq x25519_fe51_sqr
# define fe51_mul121666 x25519_fe51_mul121666
# if defined(__x86_64) || defined(__x86_64__) || \
defined(_M_AMD64) || defined(_M_X64)
# define BASE_2_64_IMPLEMENTED
# define BASE_2_64_IMPLEMENTED
typedef uint64_t fe64[4];
int x25519_fe64_eligible(void);
/*
* There are no reference C implementations for this radix.
* Following subroutines perform corresponding operations modulo
* 2^256-38, i.e. double the curve modulus. However, inputs and
* outputs are permitted to be partially reduced, i.e. to remain
* in [0..2^256) range. It's all tied up in final fe64_tobytes
* that performs full reduction modulo 2^255-19.
*
* There are no reference C implementations for these.
*/
void x25519_fe64_mul(fe64 h, const fe64 f, const fe64 g);
void x25519_fe64_sqr(fe64 h, const fe64 f);
@ -161,12 +35,12 @@ void x25519_fe64_mul121666(fe64 h, fe64 f);
void x25519_fe64_add(fe64 h, const fe64 f, const fe64 g);
void x25519_fe64_sub(fe64 h, const fe64 f, const fe64 g);
void x25519_fe64_tobytes(uint8_t *s, const fe64 f);
# define fe64_mul x25519_fe64_mul
# define fe64_sqr x25519_fe64_sqr
# define fe64_mul121666 x25519_fe64_mul121666
# define fe64_add x25519_fe64_add
# define fe64_sub x25519_fe64_sub
# define fe64_tobytes x25519_fe64_tobytes
# define fe64_mul x25519_fe64_mul
# define fe64_sqr x25519_fe64_sqr
# define fe64_mul121666 x25519_fe64_mul121666
# define fe64_add x25519_fe64_add
# define fe64_sub x25519_fe64_sub
# define fe64_tobytes x25519_fe64_tobytes
static uint64_t load_8(const uint8_t *in)
{
@ -375,10 +249,143 @@ static void x25519_scalar_mulx(uint8_t out[32], const uint8_t scalar[32],
OPENSSL_cleanse(e, sizeof(e));
}
# endif
#endif
#if defined(X25519_ASM) \
|| ( (defined(__SIZEOF_INT128__) && __SIZEOF_INT128__ == 16) \
&& !defined(__sparc__) \
&& !(defined(__ANDROID__) && !defined(__clang__)) )
/*
* Base 2^51 implementation. It's virtually no different from reference
* base 2^25.5 implementation in respect to lax boundary conditions for
* intermediate values and even individual limbs. So that whatever you
* know about the reference, applies even here...
*/
# define BASE_2_51_IMPLEMENTED
typedef uint64_t fe51[5];
static const uint64_t MASK51 = 0x7ffffffffffff;
static uint64_t load_7(const uint8_t *in)
{
uint64_t result;
result = in[0];
result |= ((uint64_t)in[1]) << 8;
result |= ((uint64_t)in[2]) << 16;
result |= ((uint64_t)in[3]) << 24;
result |= ((uint64_t)in[4]) << 32;
result |= ((uint64_t)in[5]) << 40;
result |= ((uint64_t)in[6]) << 48;
return result;
}
static uint64_t load_6(const uint8_t *in)
{
uint64_t result;
result = in[0];
result |= ((uint64_t)in[1]) << 8;
result |= ((uint64_t)in[2]) << 16;
result |= ((uint64_t)in[3]) << 24;
result |= ((uint64_t)in[4]) << 32;
result |= ((uint64_t)in[5]) << 40;
return result;
}
static void fe51_frombytes(fe51 h, const uint8_t *s)
{
uint64_t h0 = load_7(s); /* 56 bits */
uint64_t h1 = load_6(s + 7) << 5; /* 53 bits */
uint64_t h2 = load_7(s + 13) << 2; /* 58 bits */
uint64_t h3 = load_6(s + 20) << 7; /* 55 bits */
uint64_t h4 = (load_6(s + 26) & 0x7fffffffffff) << 4; /* 51 bits */
h1 |= h0 >> 51; h0 &= MASK51;
h2 |= h1 >> 51; h1 &= MASK51;
h3 |= h2 >> 51; h2 &= MASK51;
h4 |= h3 >> 51; h3 &= MASK51;
h[0] = h0;
h[1] = h1;
h[2] = h2;
h[3] = h3;
h[4] = h4;
}
static void fe51_tobytes(uint8_t *s, const fe51 h)
{
uint64_t h0 = h[0];
uint64_t h1 = h[1];
uint64_t h2 = h[2];
uint64_t h3 = h[3];
uint64_t h4 = h[4];
uint64_t q;
/* compare to modulus */
q = (h0 + 19) >> 51;
q = (h1 + q) >> 51;
q = (h2 + q) >> 51;
q = (h3 + q) >> 51;
q = (h4 + q) >> 51;
/* full reduce */
h0 += 19 * q;
h1 += h0 >> 51; h0 &= MASK51;
h2 += h1 >> 51; h1 &= MASK51;
h3 += h2 >> 51; h2 &= MASK51;
h4 += h3 >> 51; h3 &= MASK51;
h4 &= MASK51;
/* smash */
s[0] = (uint8_t)(h0 >> 0);
s[1] = (uint8_t)(h0 >> 8);
s[2] = (uint8_t)(h0 >> 16);
s[3] = (uint8_t)(h0 >> 24);
s[4] = (uint8_t)(h0 >> 32);
s[5] = (uint8_t)(h0 >> 40);
s[6] = (uint8_t)((h0 >> 48) | ((uint32_t)h1 << 3));
s[7] = (uint8_t)(h1 >> 5);
s[8] = (uint8_t)(h1 >> 13);
s[9] = (uint8_t)(h1 >> 21);
s[10] = (uint8_t)(h1 >> 29);
s[11] = (uint8_t)(h1 >> 37);
s[12] = (uint8_t)((h1 >> 45) | ((uint32_t)h2 << 6));
s[13] = (uint8_t)(h2 >> 2);
s[14] = (uint8_t)(h2 >> 10);
s[15] = (uint8_t)(h2 >> 18);
s[16] = (uint8_t)(h2 >> 26);
s[17] = (uint8_t)(h2 >> 34);
s[18] = (uint8_t)(h2 >> 42);
s[19] = (uint8_t)((h2 >> 50) | ((uint32_t)h3 << 1));
s[20] = (uint8_t)(h3 >> 7);
s[21] = (uint8_t)(h3 >> 15);
s[22] = (uint8_t)(h3 >> 23);
s[23] = (uint8_t)(h3 >> 31);
s[24] = (uint8_t)(h3 >> 39);
s[25] = (uint8_t)((h3 >> 47) | ((uint32_t)h4 << 4));
s[26] = (uint8_t)(h4 >> 4);
s[27] = (uint8_t)(h4 >> 12);
s[28] = (uint8_t)(h4 >> 20);
s[29] = (uint8_t)(h4 >> 28);
s[30] = (uint8_t)(h4 >> 36);
s[31] = (uint8_t)(h4 >> 44);
}
# if defined(X25519_ASM)
void x25519_fe51_mul(fe51 h, const fe51 f, const fe51 g);
void x25519_fe51_sqr(fe51 h, const fe51 f);
void x25519_fe51_mul121666(fe51 h, fe51 f);
# define fe51_mul x25519_fe51_mul
# define fe51_sq x25519_fe51_sqr
# define fe51_mul121666 x25519_fe51_mul121666
# else
typedef __uint128_t u128;
static void fe51_mul(fe51 h, const fe51 f, const fe51 g)
{
u128 h0, h1, h2, h3, h4;