Manual formatting tweaks to Curve448 code
Following running openssl-format-source there were a lot of manual tweaks that were requried. Reviewed-by: Bernd Edlinger <bernd.edlinger@hotmail.de> (Merged from https://github.com/openssl/openssl/pull/5105)
This commit is contained in:
parent
205fd63881
commit
8d55f844b0
24 changed files with 1654 additions and 2709 deletions
|
@ -24,13 +24,11 @@ void gf_mul(gf_s * __restrict__ cs, const gf as, const gf bs)
|
|||
{
|
||||
const uint32_t *a = as->limb, *b = bs->limb;
|
||||
uint32_t *c = cs->limb;
|
||||
|
||||
uint64_t accum0 = 0, accum1 = 0, accum2 = 0;
|
||||
uint32_t mask = (1 << 28) - 1;
|
||||
|
||||
uint32_t aa[8], bb[8];
|
||||
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
aa[i] = a[i] + a[i + 8];
|
||||
bb[i] = b[i] + b[i + 8];
|
||||
|
|
|
@ -39,9 +39,8 @@ void gf_bias(gf a, int amt)
|
|||
unsigned int i;
|
||||
uint32_t co1 = ((1 << 28) - 1) * amt, co2 = co1 - amt;
|
||||
|
||||
for (i = 0; i < sizeof(*a) / sizeof(a->limb[0]); i++) {
|
||||
for (i = 0; i < sizeof(*a) / sizeof(a->limb[0]); i++)
|
||||
a->limb[i] += (i == sizeof(*a) / sizeof(a->limb[0]) / 2) ? co2 : co1;
|
||||
}
|
||||
}
|
||||
|
||||
void gf_weak_reduce(gf a)
|
||||
|
@ -51,8 +50,7 @@ void gf_weak_reduce(gf a)
|
|||
unsigned int i;
|
||||
|
||||
a->limb[8] += tmp;
|
||||
for (i = 15; i > 0; i--) {
|
||||
for (i = 15; i > 0; i--)
|
||||
a->limb[i] = (a->limb[i] & mask) + (a->limb[i - 1] >> 28);
|
||||
}
|
||||
a->limb[0] = (a->limb[0] & mask) + tmp;
|
||||
}
|
||||
|
|
|
@ -19,7 +19,8 @@ static __inline__ __attribute((always_inline, unused))
|
|||
uint32_t word_is_zero(uint32_t a)
|
||||
{
|
||||
uint32_t ret;
|
||||
asm("subs %0, %1, #1;\n\tsbc %0, %0, %0": "=r"(ret): "r"(a):"cc");
|
||||
|
||||
asm("subs %0, %1, #1;\n\tsbc %0, %0, %0": "=r"(ret): "r"(a):"cc");
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,9 +19,10 @@ static inline void __attribute__ ((gnu_inline, always_inline))
|
|||
#ifdef __ARMEL__
|
||||
uint32_t lo = *acc, hi = (*acc) >> 32;
|
||||
|
||||
__asm__ __volatile__("smlal %[lo], %[hi], %[a], %[b]":[lo] "+&r"(lo),
|
||||
[hi] "+&r"(hi)
|
||||
:[a] "r"(a),[b] "r"(b));
|
||||
__asm__ __volatile__ ("smlal %[lo], %[hi], %[a], %[b]"
|
||||
: [lo]"+&r"(lo), [hi]"+&r"(hi)
|
||||
: [a]"r"(a), [b]"r"(b));
|
||||
|
||||
|
||||
*acc = lo + (((uint64_t)hi) << 32);
|
||||
#else
|
||||
|
@ -35,9 +36,11 @@ static inline void __attribute__ ((gnu_inline, always_inline))
|
|||
#ifdef __ARMEL__
|
||||
uint32_t lo = *acc, hi = (*acc) >> 32;
|
||||
|
||||
__asm__ __volatile__("smlal %[lo], %[hi], %[a], %[b]":[lo] "+&r"(lo),
|
||||
[hi] "+&r"(hi)
|
||||
:[a] "r"(a),[b] "r"(2 * b));
|
||||
__asm__ __volatile__ ("smlal %[lo], %[hi], %[a], %[b]"
|
||||
: [lo]"+&r"(lo), [hi]"+&r"(hi)
|
||||
: [a]"r"(a), [b]"r"(2 * b));
|
||||
|
||||
|
||||
|
||||
*acc = lo + (((uint64_t)hi) << 32);
|
||||
#else
|
||||
|
@ -51,9 +54,9 @@ static inline void __attribute__ ((gnu_inline, always_inline))
|
|||
#ifdef __ARMEL__
|
||||
uint32_t lo, hi;
|
||||
|
||||
__asm__ __volatile__("smull %[lo], %[hi], %[a], %[b]":[lo] "=&r"(lo),
|
||||
[hi] "=&r"(hi)
|
||||
:[a] "r"(a),[b] "r"(b));
|
||||
__asm__ __volatile__ ("smull %[lo], %[hi], %[a], %[b]"
|
||||
: [lo]"=&r"(lo), [hi]"=&r"(hi)
|
||||
: [a]"r"(a), [b]"r"(b));
|
||||
|
||||
*acc = lo + (((uint64_t)hi) << 32);
|
||||
#else
|
||||
|
@ -68,8 +71,8 @@ static inline void __attribute__ ((gnu_inline, always_inline))
|
|||
uint32_t lo, hi;
|
||||
|
||||
__asm__ /*__volatile__*/ ("smull %[lo], %[hi], %[a], %[b]"
|
||||
: [lo] "=&r"(lo),[hi] "=&r"(hi)
|
||||
: [a] "r"(a),[b] "r"(2 * b));
|
||||
: [lo]"=&r"(lo), [hi]"=&r"(hi)
|
||||
: [a]"r"(a), [b]"r"(2*b));
|
||||
|
||||
*acc = lo + (((uint64_t)hi) << 32);
|
||||
#else
|
||||
|
@ -729,16 +732,14 @@ void gf_sqr(gf_s * __restrict__ cs, const gf as)
|
|||
void gf_mulw_unsigned(gf_s * __restrict__ cs, const gf as, uint32_t b)
|
||||
{
|
||||
uint32_t mask = (1ull << 28) - 1;
|
||||
assert(b <= mask);
|
||||
|
||||
const uint32_t *a = as->limb;
|
||||
uint32_t *c = cs->limb;
|
||||
|
||||
uint64_t accum0, accum8;
|
||||
|
||||
int i;
|
||||
|
||||
uint32_t c0, c8, n0, n8;
|
||||
|
||||
assert(b <= mask);
|
||||
|
||||
c0 = a[0];
|
||||
c8 = a[8];
|
||||
accum0 = widemul(b, c0);
|
||||
|
|
|
@ -23,10 +23,6 @@ void gf_add_RAW(gf out, const gf a, const gf b)
|
|||
((uint32xn_t *) out)[i] =
|
||||
((const uint32xn_t *)a)[i] + ((const uint32xn_t *)b)[i];
|
||||
}
|
||||
/*
|
||||
* for (unsigned int i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
|
||||
* out->limb[i] = a->limb[i] + b->limb[i]; }
|
||||
*/
|
||||
}
|
||||
|
||||
void gf_sub_RAW(gf out, const gf a, const gf b)
|
||||
|
@ -35,10 +31,6 @@ void gf_sub_RAW(gf out, const gf a, const gf b)
|
|||
((uint32xn_t *) out)[i] =
|
||||
((const uint32xn_t *)a)[i] - ((const uint32xn_t *)b)[i];
|
||||
}
|
||||
/*
|
||||
* for (unsigned int i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
|
||||
* out->limb[i] = a->limb[i] - b->limb[i]; }
|
||||
*/
|
||||
}
|
||||
|
||||
void gf_bias(gf a, int amt)
|
||||
|
@ -47,6 +39,7 @@ void gf_bias(gf a, int amt)
|
|||
uint32x4_t lo = { co1, co1, co1, co1 }, hi = {
|
||||
co2, co1, co1, co1};
|
||||
uint32x4_t *aa = (uint32x4_t *) a;
|
||||
|
||||
aa[0] += lo;
|
||||
aa[1] += lo;
|
||||
aa[2] += hi;
|
||||
|
@ -57,6 +50,7 @@ void gf_weak_reduce(gf a)
|
|||
{
|
||||
uint64_t mask = (1ull << 28) - 1;
|
||||
uint64_t tmp = a->limb[15] >> 28;
|
||||
|
||||
a->limb[8] += tmp;
|
||||
for (unsigned int i = 15; i > 0; i--) {
|
||||
a->limb[i] = (a->limb[i] & mask) + (a->limb[i - 1] >> 28);
|
||||
|
|
|
@ -19,7 +19,7 @@ static __inline__ __attribute((always_inline, unused))
|
|||
uint32_t word_is_zero(uint32_t a)
|
||||
{
|
||||
uint32_t ret;
|
||||
__asm__("subs %0, %1, #1;\n\tsbc %0, %0, %0": "=r"(ret): "r"(a):"cc");
|
||||
__asm__("subs %0, %1, #1;\n\tsbc %0, %0, %0": "=r"(ret): "r"(a):"cc");
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -15,11 +15,13 @@
|
|||
#define USE_NEON_PERM 1
|
||||
#define LIMBHI(x) ((x##ull)>>28)
|
||||
#define LIMBLO(x) ((x##ull)&((1ull<<28)-1))
|
||||
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
|
||||
{{LIMBLO(a),LIMBLO(e), LIMBHI(a),LIMBHI(e), \
|
||||
LIMBLO(b),LIMBLO(f), LIMBHI(b),LIMBHI(f), \
|
||||
LIMBLO(c),LIMBLO(g), LIMBHI(c),LIMBHI(g), \
|
||||
LIMBLO(d),LIMBLO(h), LIMBHI(d),LIMBHI(h)}}
|
||||
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) { \
|
||||
{ \
|
||||
LIMBLO(a), LIMBLO(e), LIMBHI(a), LIMBHI(e), LIMBLO(b), LIMBLO(f), \
|
||||
LIMBHI(b), LIMBHI(f), LIMBLO(c), LIMBLO(g), LIMBHI(c), LIMBHI(g), \
|
||||
LIMBLO(d), LIMBLO(h), LIMBHI(d), LIMBHI(h) \
|
||||
} \
|
||||
}
|
||||
|
||||
#define LIMB_PLACE_VALUE(i) 28
|
||||
|
||||
|
@ -37,17 +39,13 @@ void gf_sub_RAW(gf out, const gf a, const gf b)
|
|||
((uint32xn_t *) out)[i] =
|
||||
((const uint32xn_t *)a)[i] - ((const uint32xn_t *)b)[i];
|
||||
}
|
||||
/*
|
||||
* unsigned int i; for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
|
||||
* out->limb[i] = a->limb[i] - b->limb[i]; }
|
||||
*/
|
||||
}
|
||||
|
||||
void gf_bias(gf a, int amt)
|
||||
{
|
||||
uint32_t co1 = ((1ull << 28) - 1) * amt, co2 = co1 - amt;
|
||||
uint32x4_t lo = { co1, co2, co1, co1 }, hi = {
|
||||
co1, co1, co1, co1};
|
||||
uint32x4_t lo = { co1, co2, co1, co1 };
|
||||
uint32x4_t hi = { co1, co1, co1, co1 };
|
||||
uint32x4_t *aa = (uint32x4_t *) a;
|
||||
aa[0] += lo;
|
||||
aa[1] += hi;
|
||||
|
@ -57,13 +55,11 @@ void gf_bias(gf a, int amt)
|
|||
|
||||
void gf_weak_reduce(gf a)
|
||||
{
|
||||
uint32x2_t *aa = (uint32x2_t *) a;
|
||||
uint32x2_t vmask = { (1ull << 28) - 1, (1ull << 28) - 1};
|
||||
uint32x2_t vm2 = { 0, -1}, tmp = vshr_n_u32(aa[7], 28);
|
||||
|
||||
uint32x2_t *aa = (uint32x2_t *) a, vmask = {
|
||||
(1ull << 28) - 1, (1ull << 28) - 1}, vm2 = {
|
||||
0, -1}, tmp = vshr_n_u32(aa[7], 28);
|
||||
|
||||
for (unsigned int i = 7; i >= 1; i--) {
|
||||
for (unsigned int i = 7; i >= 1; i--)
|
||||
aa[i] = vsra_n_u32(aa[i] & vmask, aa[i - 1], 28);
|
||||
}
|
||||
aa[0] = (aa[0] & vmask) + vrev64_u32(tmp) + (tmp & vm2);
|
||||
}
|
||||
|
|
|
@ -15,13 +15,11 @@ void gf_mul(gf_s * __restrict__ cs, const gf as, const gf bs)
|
|||
{
|
||||
const uint64_t *a = as->limb, *b = bs->limb;
|
||||
uint64_t *c = cs->limb;
|
||||
|
||||
__uint128_t accum0 = 0, accum1 = 0, accum2;
|
||||
uint64_t mask = (1ull << 56) - 1;
|
||||
|
||||
uint64_t aa[4], bb[4], bbb[4];
|
||||
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
aa[i] = a[i] + a[i + 4];
|
||||
bb[i] = b[i] + b[i + 4];
|
||||
|
@ -177,11 +175,10 @@ void gf_mulw_unsigned(gf_s * __restrict__ cs, const gf as, uint32_t b)
|
|||
{
|
||||
const uint64_t *a = as->limb;
|
||||
uint64_t *c = cs->limb;
|
||||
|
||||
__uint128_t accum0 = 0, accum4 = 0;
|
||||
uint64_t mask = (1ull << 56) - 1;
|
||||
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
accum0 += widemul(b, a[i]);
|
||||
accum4 += widemul(b, a[i + 4]);
|
||||
|
@ -204,10 +201,8 @@ void gf_sqr(gf_s * __restrict__ cs, const gf as)
|
|||
{
|
||||
const uint64_t *a = as->limb;
|
||||
uint64_t *c = cs->limb;
|
||||
|
||||
__uint128_t accum0 = 0, accum1 = 0, accum2;
|
||||
uint64_t mask = (1ull << 56) - 1;
|
||||
|
||||
uint64_t aa[4];
|
||||
|
||||
/* For some reason clang doesn't vectorize this without prompting? */
|
||||
|
|
|
@ -17,18 +17,17 @@
|
|||
|
||||
void gf_add_RAW(gf out, const gf a, const gf b)
|
||||
{
|
||||
for (unsigned int i = 0; i < 8; i++) {
|
||||
for (unsigned int i = 0; i < 8; i++)
|
||||
out->limb[i] = a->limb[i] + b->limb[i];
|
||||
}
|
||||
gf_weak_reduce(out);
|
||||
}
|
||||
|
||||
void gf_sub_RAW(gf out, const gf a, const gf b)
|
||||
{
|
||||
uint64_t co1 = ((1ull << 56) - 1) * 2, co2 = co1 - 2;
|
||||
for (unsigned int i = 0; i < 8; i++) {
|
||||
|
||||
for (unsigned int i = 0; i < 8; i++)
|
||||
out->limb[i] = a->limb[i] - b->limb[i] + ((i == 4) ? co2 : co1);
|
||||
}
|
||||
gf_weak_reduce(out);
|
||||
}
|
||||
|
||||
|
@ -42,9 +41,9 @@ void gf_weak_reduce(gf a)
|
|||
{
|
||||
uint64_t mask = (1ull << 56) - 1;
|
||||
uint64_t tmp = a->limb[7] >> 56;
|
||||
|
||||
a->limb[4] += tmp;
|
||||
for (unsigned int i = 7; i > 0; i--) {
|
||||
for (unsigned int i = 7; i > 0; i--)
|
||||
a->limb[i] = (a->limb[i] & mask) + (a->limb[i - 1] >> 56);
|
||||
}
|
||||
a->limb[0] = (a->limb[0] & mask) + tmp;
|
||||
}
|
||||
|
|
|
@ -10,287 +10,324 @@
|
|||
* Originally written by Mike Hamburg
|
||||
*/
|
||||
#ifndef __ARCH_X86_64_ARCH_INTRINSICS_H__
|
||||
# define __ARCH_X86_64_ARCH_INTRINSICS_H__
|
||||
#define __ARCH_X86_64_ARCH_INTRINSICS_H__
|
||||
|
||||
# define ARCH_WORD_BITS 64
|
||||
#define ARCH_WORD_BITS 64
|
||||
|
||||
# include <openssl/e_os2.h>
|
||||
#include <openssl/e_os2.h>
|
||||
|
||||
/* FUTURE: autogenerate */
|
||||
static __inline__ __uint128_t widemul(const uint64_t *a, const uint64_t *b)
|
||||
{
|
||||
uint64_t c, d;
|
||||
# ifndef __BMI2__
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rax;" "mulq %[b];":[c] "=&a"(c),[d] "=d"(d)
|
||||
:[b] "m"(*b),[a] "m"(*a)
|
||||
:"cc");
|
||||
# else
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rdx;" "mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
|
||||
:[b] "m"(*b),[a] "m"(*a)
|
||||
:"rdx");
|
||||
# endif
|
||||
return (((__uint128_t) (d)) << 64) | c;
|
||||
uint64_t c, d;
|
||||
|
||||
#ifndef __BMI2__
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rax;"
|
||||
"mulq %[b];"
|
||||
: [c]"=&a"(c), [d]"=d"(d)
|
||||
: [b]"m"(*b), [a]"m"(*a)
|
||||
: "cc");
|
||||
#else
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rdx;"
|
||||
"mulx %[b], %[c], %[d];"
|
||||
: [c]"=r"(c), [d]"=r"(d)
|
||||
: [b]"m"(*b), [a]"m"(*a)
|
||||
: "rdx");
|
||||
#endif
|
||||
return (((__uint128_t)(d)) << 64) | c;
|
||||
}
|
||||
|
||||
static __inline__ __uint128_t widemul_rm(uint64_t a, const uint64_t *b)
|
||||
{
|
||||
uint64_t c, d;
|
||||
# ifndef __BMI2__
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rax;" "mulq %[b];":[c] "=&a"(c),[d] "=d"(d)
|
||||
:[b] "m"(*b),[a] "r"(a)
|
||||
:"cc");
|
||||
# else
|
||||
__asm__ volatile
|
||||
("mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
|
||||
:[b] "m"(*b),[a] "d"(a));
|
||||
# endif
|
||||
return (((__uint128_t) (d)) << 64) | c;
|
||||
uint64_t c, d;
|
||||
|
||||
#ifndef __BMI2__
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rax;"
|
||||
"mulq %[b];"
|
||||
: [c]"=&a"(c), [d]"=d"(d)
|
||||
: [b]"m"(*b), [a]"r"(a)
|
||||
: "cc");
|
||||
#else
|
||||
__asm__ volatile
|
||||
("mulx %[b], %[c], %[d];"
|
||||
: [c]"=r"(c), [d]"=r"(d)
|
||||
: [b]"m"(*b), [a]"d"(a));
|
||||
#endif
|
||||
return (((__uint128_t)(d)) << 64) | c;
|
||||
}
|
||||
|
||||
static __inline__ __uint128_t widemul_rr(uint64_t a, uint64_t b)
|
||||
{
|
||||
uint64_t c, d;
|
||||
# ifndef __BMI2__
|
||||
__asm__ volatile
|
||||
("mulq %[b];":[c] "=a"(c),[d] "=d"(d)
|
||||
:[b] "r"(b), "a"(a)
|
||||
:"cc");
|
||||
# else
|
||||
__asm__ volatile
|
||||
("mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
|
||||
:[b] "r"(b),[a] "d"(a));
|
||||
# endif
|
||||
return (((__uint128_t) (d)) << 64) | c;
|
||||
uint64_t c, d;
|
||||
|
||||
#ifndef __BMI2__
|
||||
__asm__ volatile
|
||||
("mulq %[b];"
|
||||
: [c]"=a"(c), [d]"=d"(d)
|
||||
: [b]"r"(b), "a"(a)
|
||||
: "cc");
|
||||
#else
|
||||
__asm__ volatile
|
||||
("mulx %[b], %[c], %[d];"
|
||||
: [c]"=r"(c), [d]"=r"(d)
|
||||
: [b]"r"(b), [a]"d"(a));
|
||||
#endif
|
||||
return (((__uint128_t)(d)) << 64) | c;
|
||||
}
|
||||
|
||||
static __inline__ __uint128_t widemul2(const uint64_t *a, const uint64_t *b)
|
||||
{
|
||||
uint64_t c, d;
|
||||
# ifndef __BMI2__
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rax; "
|
||||
"addq %%rax, %%rax; " "mulq %[b];":[c] "=&a"(c),[d] "=d"(d)
|
||||
:[b] "m"(*b),[a] "m"(*a)
|
||||
:"cc");
|
||||
# else
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rdx;"
|
||||
"leaq (,%%rdx,2), %%rdx;" "mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
|
||||
:[b] "m"(*b),[a] "m"(*a)
|
||||
:"rdx");
|
||||
# endif
|
||||
return (((__uint128_t) (d)) << 64) | c;
|
||||
uint64_t c, d;
|
||||
|
||||
#ifndef __BMI2__
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rax; "
|
||||
"addq %%rax, %%rax; "
|
||||
"mulq %[b];"
|
||||
: [c]"=&a"(c), [d]"=d"(d)
|
||||
: [b]"m"(*b), [a]"m"(*a)
|
||||
: "cc");
|
||||
#else
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rdx;"
|
||||
"leaq (,%%rdx,2), %%rdx;"
|
||||
"mulx %[b], %[c], %[d];"
|
||||
: [c]"=r"(c), [d]"=r"(d)
|
||||
: [b]"m"(*b), [a]"m"(*a)
|
||||
: "rdx");
|
||||
#endif
|
||||
return (((__uint128_t)(d)) << 64) | c;
|
||||
}
|
||||
|
||||
static __inline__ void mac(__uint128_t * acc, const uint64_t *a,
|
||||
static __inline__ void mac(__uint128_t *acc, const uint64_t *a,
|
||||
const uint64_t *b)
|
||||
{
|
||||
uint64_t lo = *acc, hi = *acc >> 64;
|
||||
|
||||
# ifdef __BMI2__
|
||||
uint64_t c, d;
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rdx; "
|
||||
"mulx %[b], %[c], %[d]; "
|
||||
"addq %[c], %[lo]; "
|
||||
"adcq %[d], %[hi]; ":[c] "=&r"(c),[d] "=&r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
|
||||
:[b] "m"(*b),[a] "m"(*a)
|
||||
:"rdx", "cc");
|
||||
# else
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rax; "
|
||||
"mulq %[b]; "
|
||||
"addq %%rax, %[lo]; " "adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
|
||||
:[b] "m"(*b),[a] "m"(*a)
|
||||
:"rax", "rdx", "cc");
|
||||
# endif
|
||||
|
||||
*acc = (((__uint128_t) (hi)) << 64) | lo;
|
||||
uint64_t lo = *acc, hi = *acc >> 64;
|
||||
|
||||
#ifdef __BMI2__
|
||||
uint64_t c,d;
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rdx; "
|
||||
"mulx %[b], %[c], %[d]; "
|
||||
"addq %[c], %[lo]; "
|
||||
"adcq %[d], %[hi]; "
|
||||
: [c]"=&r"(c), [d]"=&r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
|
||||
: [b]"m"(*b), [a]"m"(*a)
|
||||
: "rdx", "cc");
|
||||
#else
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rax; "
|
||||
"mulq %[b]; "
|
||||
"addq %%rax, %[lo]; "
|
||||
"adcq %%rdx, %[hi]; "
|
||||
: [lo]"+r"(lo), [hi]"+r"(hi)
|
||||
: [b]"m"(*b), [a]"m"(*a)
|
||||
: "rax", "rdx", "cc");
|
||||
#endif
|
||||
|
||||
*acc = (((__uint128_t)(hi)) << 64) | lo;
|
||||
}
|
||||
|
||||
static __inline__ void macac(__uint128_t * acc, __uint128_t * acc2,
|
||||
static __inline__ void macac(__uint128_t *acc, __uint128_t *acc2,
|
||||
const uint64_t *a, const uint64_t *b)
|
||||
{
|
||||
uint64_t lo = *acc, hi = *acc >> 64;
|
||||
uint64_t lo2 = *acc2, hi2 = *acc2 >> 64;
|
||||
|
||||
# ifdef __BMI2__
|
||||
uint64_t c, d;
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rdx; "
|
||||
"mulx %[b], %[c], %[d]; "
|
||||
"addq %[c], %[lo]; "
|
||||
"adcq %[d], %[hi]; "
|
||||
"addq %[c], %[lo2]; "
|
||||
"adcq %[d], %[hi2]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi),
|
||||
[lo2] "+r"(lo2),[hi2] "+r"(hi2)
|
||||
:[b] "m"(*b),[a] "m"(*a)
|
||||
:"rdx", "cc");
|
||||
# else
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rax; "
|
||||
"mulq %[b]; "
|
||||
"addq %%rax, %[lo]; "
|
||||
"adcq %%rdx, %[hi]; "
|
||||
"addq %%rax, %[lo2]; "
|
||||
"adcq %%rdx, %[hi2]; ":[lo] "+r"(lo),[hi] "+r"(hi),[lo2] "+r"(lo2),
|
||||
[hi2] "+r"(hi2)
|
||||
:[b] "m"(*b),[a] "m"(*a)
|
||||
:"rax", "rdx", "cc");
|
||||
# endif
|
||||
|
||||
*acc = (((__uint128_t) (hi)) << 64) | lo;
|
||||
*acc2 = (((__uint128_t) (hi2)) << 64) | lo2;
|
||||
uint64_t lo = *acc, hi = *acc >> 64;
|
||||
uint64_t lo2 = *acc2, hi2 = *acc2 >> 64;
|
||||
|
||||
#ifdef __BMI2__
|
||||
uint64_t c,d;
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rdx; "
|
||||
"mulx %[b], %[c], %[d]; "
|
||||
"addq %[c], %[lo]; "
|
||||
"adcq %[d], %[hi]; "
|
||||
"addq %[c], %[lo2]; "
|
||||
"adcq %[d], %[hi2]; "
|
||||
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2)
|
||||
: [b]"m"(*b), [a]"m"(*a)
|
||||
: "rdx", "cc");
|
||||
#else
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rax; "
|
||||
"mulq %[b]; "
|
||||
"addq %%rax, %[lo]; "
|
||||
"adcq %%rdx, %[hi]; "
|
||||
"addq %%rax, %[lo2]; "
|
||||
"adcq %%rdx, %[hi2]; "
|
||||
: [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2)
|
||||
: [b]"m"(*b), [a]"m"(*a)
|
||||
: "rax", "rdx", "cc");
|
||||
#endif
|
||||
|
||||
*acc = (((__uint128_t)(hi)) << 64) | lo;
|
||||
*acc2 = (((__uint128_t)(hi2)) << 64) | lo2;
|
||||
}
|
||||
|
||||
static __inline__ void mac_rm(__uint128_t * acc, uint64_t a, const uint64_t *b)
|
||||
static __inline__ void mac_rm(__uint128_t *acc, uint64_t a, const uint64_t *b)
|
||||
{
|
||||
uint64_t lo = *acc, hi = *acc >> 64;
|
||||
|
||||
# ifdef __BMI2__
|
||||
uint64_t c, d;
|
||||
__asm__ volatile
|
||||
("mulx %[b], %[c], %[d]; "
|
||||
"addq %[c], %[lo]; "
|
||||
"adcq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
|
||||
:[b] "m"(*b),[a] "d"(a)
|
||||
:"cc");
|
||||
# else
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rax; "
|
||||
"mulq %[b]; "
|
||||
"addq %%rax, %[lo]; " "adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
|
||||
:[b] "m"(*b),[a] "r"(a)
|
||||
:"rax", "rdx", "cc");
|
||||
# endif
|
||||
|
||||
*acc = (((__uint128_t) (hi)) << 64) | lo;
|
||||
uint64_t lo = *acc, hi = *acc >> 64;
|
||||
|
||||
#ifdef __BMI2__
|
||||
uint64_t c,d;
|
||||
__asm__ volatile
|
||||
("mulx %[b], %[c], %[d]; "
|
||||
"addq %[c], %[lo]; "
|
||||
"adcq %[d], %[hi]; "
|
||||
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
|
||||
: [b]"m"(*b), [a]"d"(a)
|
||||
: "cc");
|
||||
#else
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rax; "
|
||||
"mulq %[b]; "
|
||||
"addq %%rax, %[lo]; "
|
||||
"adcq %%rdx, %[hi]; "
|
||||
: [lo]"+r"(lo), [hi]"+r"(hi)
|
||||
: [b]"m"(*b), [a]"r"(a)
|
||||
: "rax", "rdx", "cc");
|
||||
#endif
|
||||
|
||||
*acc = (((__uint128_t)(hi)) << 64) | lo;
|
||||
}
|
||||
|
||||
static __inline__ void mac_rr(__uint128_t * acc, uint64_t a, const uint64_t b)
|
||||
static __inline__ void mac_rr(__uint128_t *acc, uint64_t a, const uint64_t b)
|
||||
{
|
||||
uint64_t lo = *acc, hi = *acc >> 64;
|
||||
|
||||
# ifdef __BMI2__
|
||||
uint64_t c, d;
|
||||
__asm__ volatile
|
||||
("mulx %[b], %[c], %[d]; "
|
||||
"addq %[c], %[lo]; "
|
||||
"adcq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
|
||||
:[b] "r"(b),[a] "d"(a)
|
||||
:"cc");
|
||||
# else
|
||||
__asm__ volatile
|
||||
("mulq %[b]; "
|
||||
"addq %%rax, %[lo]; "
|
||||
"adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi), "+a"(a)
|
||||
:[b] "r"(b)
|
||||
:"rdx", "cc");
|
||||
# endif
|
||||
|
||||
*acc = (((__uint128_t) (hi)) << 64) | lo;
|
||||
uint64_t lo = *acc, hi = *acc >> 64;
|
||||
|
||||
#ifdef __BMI2__
|
||||
uint64_t c,d;
|
||||
__asm__ volatile
|
||||
("mulx %[b], %[c], %[d]; "
|
||||
"addq %[c], %[lo]; "
|
||||
"adcq %[d], %[hi]; "
|
||||
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
|
||||
: [b]"r"(b), [a]"d"(a)
|
||||
: "cc");
|
||||
#else
|
||||
__asm__ volatile
|
||||
("mulq %[b]; "
|
||||
"addq %%rax, %[lo]; "
|
||||
"adcq %%rdx, %[hi]; "
|
||||
: [lo]"+r"(lo), [hi]"+r"(hi), "+a"(a)
|
||||
: [b]"r"(b)
|
||||
: "rdx", "cc");
|
||||
#endif
|
||||
|
||||
*acc = (((__uint128_t)(hi)) << 64) | lo;
|
||||
}
|
||||
|
||||
static __inline__ void mac2(__uint128_t * acc, const uint64_t *a,
|
||||
static __inline__ void mac2(__uint128_t *acc, const uint64_t *a,
|
||||
const uint64_t *b)
|
||||
{
|
||||
uint64_t lo = *acc, hi = *acc >> 64;
|
||||
|
||||
# ifdef __BMI2__
|
||||
uint64_t c, d;
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rdx; "
|
||||
"addq %%rdx, %%rdx; "
|
||||
"mulx %[b], %[c], %[d]; "
|
||||
"addq %[c], %[lo]; "
|
||||
"adcq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
|
||||
:[b] "m"(*b),[a] "m"(*a)
|
||||
:"rdx", "cc");
|
||||
# else
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rax; "
|
||||
"addq %%rax, %%rax; "
|
||||
"mulq %[b]; "
|
||||
"addq %%rax, %[lo]; " "adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
|
||||
:[b] "m"(*b),[a] "m"(*a)
|
||||
:"rax", "rdx", "cc");
|
||||
# endif
|
||||
|
||||
*acc = (((__uint128_t) (hi)) << 64) | lo;
|
||||
uint64_t lo = *acc, hi = *acc >> 64;
|
||||
|
||||
#ifdef __BMI2__
|
||||
uint64_t c,d;
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rdx; "
|
||||
"addq %%rdx, %%rdx; "
|
||||
"mulx %[b], %[c], %[d]; "
|
||||
"addq %[c], %[lo]; "
|
||||
"adcq %[d], %[hi]; "
|
||||
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
|
||||
: [b]"m"(*b), [a]"m"(*a)
|
||||
: "rdx", "cc");
|
||||
#else
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rax; "
|
||||
"addq %%rax, %%rax; "
|
||||
"mulq %[b]; "
|
||||
"addq %%rax, %[lo]; "
|
||||
"adcq %%rdx, %[hi]; "
|
||||
: [lo]"+r"(lo), [hi]"+r"(hi)
|
||||
: [b]"m"(*b), [a]"m"(*a)
|
||||
: "rax", "rdx", "cc");
|
||||
#endif
|
||||
|
||||
*acc = (((__uint128_t)(hi)) << 64) | lo;
|
||||
}
|
||||
|
||||
static __inline__ void msb(__uint128_t * acc, const uint64_t *a,
|
||||
static __inline__ void msb(__uint128_t *acc, const uint64_t *a,
|
||||
const uint64_t *b)
|
||||
{
|
||||
uint64_t lo = *acc, hi = *acc >> 64;
|
||||
# ifdef __BMI2__
|
||||
uint64_t c, d;
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rdx; "
|
||||
"mulx %[b], %[c], %[d]; "
|
||||
"subq %[c], %[lo]; "
|
||||
"sbbq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
|
||||
:[b] "m"(*b),[a] "m"(*a)
|
||||
:"rdx", "cc");
|
||||
# else
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rax; "
|
||||
"mulq %[b]; "
|
||||
"subq %%rax, %[lo]; " "sbbq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
|
||||
:[b] "m"(*b),[a] "m"(*a)
|
||||
:"rax", "rdx", "cc");
|
||||
# endif
|
||||
*acc = (((__uint128_t) (hi)) << 64) | lo;
|
||||
uint64_t lo = *acc, hi = *acc >> 64;
|
||||
|
||||
#ifdef __BMI2__
|
||||
uint64_t c,d;
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rdx; "
|
||||
"mulx %[b], %[c], %[d]; "
|
||||
"subq %[c], %[lo]; "
|
||||
"sbbq %[d], %[hi]; "
|
||||
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
|
||||
: [b]"m"(*b), [a]"m"(*a)
|
||||
: "rdx", "cc");
|
||||
#else
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rax; "
|
||||
"mulq %[b]; "
|
||||
"subq %%rax, %[lo]; "
|
||||
"sbbq %%rdx, %[hi]; "
|
||||
: [lo]"+r"(lo), [hi]"+r"(hi)
|
||||
: [b]"m"(*b), [a]"m"(*a)
|
||||
: "rax", "rdx", "cc");
|
||||
#endif
|
||||
*acc = (((__uint128_t)(hi)) << 64) | lo;
|
||||
}
|
||||
|
||||
static __inline__ void msb2(__uint128_t * acc, const uint64_t *a,
|
||||
static __inline__ void msb2(__uint128_t *acc, const uint64_t *a,
|
||||
const uint64_t *b)
|
||||
{
|
||||
uint64_t lo = *acc, hi = *acc >> 64;
|
||||
# ifdef __BMI2__
|
||||
uint64_t c, d;
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rdx; "
|
||||
"addq %%rdx, %%rdx; "
|
||||
"mulx %[b], %[c], %[d]; "
|
||||
"subq %[c], %[lo]; "
|
||||
"sbbq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
|
||||
:[b] "m"(*b),[a] "m"(*a)
|
||||
:"rdx", "cc");
|
||||
# else
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rax; "
|
||||
"addq %%rax, %%rax; "
|
||||
"mulq %[b]; "
|
||||
"subq %%rax, %[lo]; " "sbbq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
|
||||
:[b] "m"(*b),[a] "m"(*a)
|
||||
:"rax", "rdx", "cc");
|
||||
# endif
|
||||
*acc = (((__uint128_t) (hi)) << 64) | lo;
|
||||
uint64_t lo = *acc, hi = *acc >> 64;
|
||||
|
||||
#ifdef __BMI2__
|
||||
uint64_t c,d;
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rdx; "
|
||||
"addq %%rdx, %%rdx; "
|
||||
"mulx %[b], %[c], %[d]; "
|
||||
"subq %[c], %[lo]; "
|
||||
"sbbq %[d], %[hi]; "
|
||||
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
|
||||
: [b]"m"(*b), [a]"m"(*a)
|
||||
: "rdx", "cc");
|
||||
#else
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rax; "
|
||||
"addq %%rax, %%rax; "
|
||||
"mulq %[b]; "
|
||||
"subq %%rax, %[lo]; "
|
||||
"sbbq %%rdx, %[hi]; "
|
||||
: [lo]"+r"(lo), [hi]"+r"(hi)
|
||||
: [b]"m"(*b), [a]"m"(*a)
|
||||
: "rax", "rdx", "cc");
|
||||
#endif
|
||||
*acc = (((__uint128_t)(hi))<<64) | lo;
|
||||
|
||||
}
|
||||
|
||||
static __inline__ void mrs(__uint128_t * acc, const uint64_t *a,
|
||||
static __inline__ void mrs(__uint128_t *acc, const uint64_t *a,
|
||||
const uint64_t *b)
|
||||
{
|
||||
uint64_t c, d, lo = *acc, hi = *acc >> 64;
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rdx; "
|
||||
"mulx %[b], %[c], %[d]; "
|
||||
"subq %[lo], %[c]; "
|
||||
"sbbq %[hi], %[d]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
|
||||
:[b] "m"(*b),[a] "m"(*a)
|
||||
:"rdx", "cc");
|
||||
*acc = (((__uint128_t) (d)) << 64) | c;
|
||||
uint64_t c,d, lo = *acc, hi = *acc >> 64;
|
||||
__asm__ volatile
|
||||
("movq %[a], %%rdx; "
|
||||
"mulx %[b], %[c], %[d]; "
|
||||
"subq %[lo], %[c]; "
|
||||
"sbbq %[hi], %[d]; "
|
||||
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
|
||||
: [b]"m"(*b), [a]"m"(*a)
|
||||
: "rdx", "cc");
|
||||
*acc = (((__uint128_t)(d)) << 64) | c;
|
||||
}
|
||||
|
||||
static __inline__ uint64_t word_is_zero(uint64_t x)
|
||||
{
|
||||
__asm__ volatile ("neg %0; sbb %0, %0;":"+r" (x));
|
||||
return ~x;
|
||||
__asm__ volatile("neg %0; sbb %0, %0;" : "+r"(x));
|
||||
return ~x;
|
||||
}
|
||||
|
||||
static inline uint64_t shrld(__uint128_t x, int n)
|
||||
|
@ -298,4 +335,4 @@ static inline uint64_t shrld(__uint128_t x, int n)
|
|||
return x >> n;
|
||||
}
|
||||
|
||||
#endif /* __ARCH_X86_64_ARCH_INTRINSICS_H__ */
|
||||
#endif /* __ARCH_X86_64_ARCH_INTRINSICS_H__ */
|
||||
|
|
|
@ -16,10 +16,8 @@ void gf_mul(gf_s * __restrict__ cs, const gf as, const gf bs)
|
|||
{
|
||||
const uint64_t *a = as->limb, *b = bs->limb;
|
||||
uint64_t *c = cs->limb;
|
||||
|
||||
__uint128_t accum0 = 0, accum1 = 0, accum2;
|
||||
uint64_t mask = (1ull << 56) - 1;
|
||||
|
||||
uint64_t aa[4] VECTOR_ALIGNED, bb[4] VECTOR_ALIGNED, bbb[4] VECTOR_ALIGNED;
|
||||
|
||||
/* For some reason clang doesn't vectorize this without prompting? */
|
||||
|
@ -202,10 +200,8 @@ void gf_sqr(gf_s * __restrict__ cs, const gf as)
|
|||
{
|
||||
const uint64_t *a = as->limb;
|
||||
uint64_t *c = cs->limb;
|
||||
|
||||
__uint128_t accum0 = 0, accum1 = 0, accum2;
|
||||
uint64_t mask = (1ull << 56) - 1;
|
||||
|
||||
uint64_t aa[4] VECTOR_ALIGNED;
|
||||
|
||||
/* For some reason clang doesn't vectorize this without prompting? */
|
||||
|
|
|
@ -20,10 +20,6 @@ void gf_add_RAW(gf out, const gf a, const gf b)
|
|||
((uint64xn_t *) out)[i] =
|
||||
((const uint64xn_t *)a)[i] + ((const uint64xn_t *)b)[i];
|
||||
}
|
||||
/*
|
||||
* unsigned int i; for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
|
||||
* out->limb[i] = a->limb[i] + b->limb[i]; }
|
||||
*/
|
||||
}
|
||||
|
||||
void gf_sub_RAW(gf out, const gf a, const gf b)
|
||||
|
@ -32,10 +28,6 @@ void gf_sub_RAW(gf out, const gf a, const gf b)
|
|||
((uint64xn_t *) out)[i] =
|
||||
((const uint64xn_t *)a)[i] - ((const uint64xn_t *)b)[i];
|
||||
}
|
||||
/*
|
||||
* unsigned int i; for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
|
||||
* out->limb[i] = a->limb[i] - b->limb[i]; }
|
||||
*/
|
||||
}
|
||||
|
||||
void gf_bias(gf a, int amt)
|
||||
|
@ -68,6 +60,7 @@ void gf_weak_reduce(gf a)
|
|||
/* PERF: use pshufb/palignr if anyone cares about speed of this */
|
||||
uint64_t mask = (1ull << 56) - 1;
|
||||
uint64_t tmp = a->limb[7] >> 56;
|
||||
|
||||
a->limb[4] += tmp;
|
||||
for (unsigned int i = 7; i > 0; i--) {
|
||||
a->limb[i] = (a->limb[i] & mask) + (a->limb[i - 1] >> 56);
|
||||
|
|
|
@ -36,32 +36,32 @@
|
|||
* Instead, we're putting our trust in the loop unroller and unswitcher.
|
||||
*/
|
||||
|
||||
/**
|
||||
/*
|
||||
* Unaligned big (vector?) register.
|
||||
*/
|
||||
typedef struct {
|
||||
big_register_t unaligned;
|
||||
} __attribute__ ((packed)) unaligned_br_t;
|
||||
|
||||
/**
|
||||
/*
|
||||
* Unaligned word register, for architectures where that matters.
|
||||
*/
|
||||
typedef struct {
|
||||
word_t unaligned;
|
||||
} __attribute__ ((packed)) unaligned_word_t;
|
||||
|
||||
/**
|
||||
* @brief Constant-time conditional swap.
|
||||
/*
|
||||
* Constant-time conditional swap.
|
||||
*
|
||||
* If doswap, then swap elem_bytes between *a and *b.
|
||||
*
|
||||
* *a and *b must not alias. Also, they must be at least as aligned
|
||||
* as their sizes, if the CPU cares about that sort of thing.
|
||||
*/
|
||||
static __inline__ void
|
||||
__attribute__ ((unused, always_inline))
|
||||
constant_time_cond_swap(void *__restrict__ a_,
|
||||
void *__restrict__ b_, word_t elem_bytes, mask_t doswap)
|
||||
static ossl_inline void constant_time_cond_swap(void *__restrict__ a_,
|
||||
void *__restrict__ b_,
|
||||
word_t elem_bytes,
|
||||
mask_t doswap)
|
||||
{
|
||||
word_t k;
|
||||
unsigned char *a = (unsigned char *)a_;
|
||||
|
@ -72,19 +72,19 @@ static __inline__ void
|
|||
k += sizeof(big_register_t)) {
|
||||
if (elem_bytes % sizeof(big_register_t)) {
|
||||
/* unaligned */
|
||||
big_register_t xor =
|
||||
((unaligned_br_t *) (&a[k]))->unaligned
|
||||
^ ((unaligned_br_t *) (&b[k]))->unaligned;
|
||||
big_register_t xor = ((unaligned_br_t *) (&a[k]))->unaligned
|
||||
^ ((unaligned_br_t *) (&b[k]))->unaligned;
|
||||
|
||||
xor &= br_mask;
|
||||
((unaligned_br_t *) (&a[k]))->unaligned ^= xor;
|
||||
((unaligned_br_t *) (&b[k]))->unaligned ^= xor;
|
||||
((unaligned_br_t *)(&a[k]))->unaligned ^= xor;
|
||||
((unaligned_br_t *)(&b[k]))->unaligned ^= xor;
|
||||
} else {
|
||||
/* aligned */
|
||||
big_register_t xor = *((big_register_t *) (&a[k]))
|
||||
^ *((big_register_t *) (&b[k]));
|
||||
^ *((big_register_t *) (&b[k]));
|
||||
xor &= br_mask;
|
||||
*((big_register_t *) (&a[k])) ^= xor;
|
||||
*((big_register_t *) (&b[k])) ^= xor;
|
||||
*((big_register_t *)(&a[k])) ^= xor;
|
||||
*((big_register_t *)(&b[k])) ^= xor;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -92,19 +92,18 @@ static __inline__ void
|
|||
for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
|
||||
if (elem_bytes % sizeof(word_t)) {
|
||||
/* unaligned */
|
||||
word_t xor =
|
||||
((unaligned_word_t *) (&a[k]))->unaligned
|
||||
^ ((unaligned_word_t *) (&b[k]))->unaligned;
|
||||
word_t xor = ((unaligned_word_t *)(&a[k]))->unaligned
|
||||
^ ((unaligned_word_t *)(&b[k]))->unaligned;
|
||||
|
||||
xor &= doswap;
|
||||
((unaligned_word_t *) (&a[k]))->unaligned ^= xor;
|
||||
((unaligned_word_t *) (&b[k]))->unaligned ^= xor;
|
||||
((unaligned_word_t *)(&a[k]))->unaligned ^= xor;
|
||||
((unaligned_word_t *)(&b[k]))->unaligned ^= xor;
|
||||
} else {
|
||||
/* aligned */
|
||||
word_t xor = *((word_t *) (&a[k]))
|
||||
^ *((word_t *) (&b[k]));
|
||||
word_t xor = *((word_t *) (&a[k])) ^ *((word_t *) (&b[k]));
|
||||
xor &= doswap;
|
||||
*((word_t *) (&a[k])) ^= xor;
|
||||
*((word_t *) (&b[k])) ^= xor;
|
||||
*((word_t *)(&a[k])) ^= xor;
|
||||
*((word_t *)(&b[k])) ^= xor;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -112,6 +111,7 @@ static __inline__ void
|
|||
if (elem_bytes % sizeof(word_t)) {
|
||||
for (; k < elem_bytes; k += 1) {
|
||||
unsigned char xor = a[k] ^ b[k];
|
||||
|
||||
xor &= doswap;
|
||||
a[k] ^= xor;
|
||||
b[k] ^= xor;
|
||||
|
@ -119,23 +119,23 @@ static __inline__ void
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes);
|
||||
/*
|
||||
* Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes);
|
||||
*
|
||||
* The table must be at least as aligned as elem_bytes. The output must be word aligned,
|
||||
* and if the input size is vector aligned it must also be vector aligned.
|
||||
*
|
||||
* The table and output must not alias.
|
||||
*/
|
||||
static __inline__ void
|
||||
__attribute__ ((unused, always_inline))
|
||||
constant_time_lookup(void *__restrict__ out_,
|
||||
const void *table_,
|
||||
word_t elem_bytes, word_t n_table, word_t idx)
|
||||
static ossl_inline void constant_time_lookup(void *__restrict__ out_,
|
||||
const void *table_,
|
||||
word_t elem_bytes,
|
||||
word_t n_table,
|
||||
word_t idx)
|
||||
{
|
||||
big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
|
||||
|
||||
/* Can't do pointer arithmetic on void* */
|
||||
/* Can't do pointer arithmetic on void * */
|
||||
unsigned char *out = (unsigned char *)out_;
|
||||
const unsigned char *table = (const unsigned char *)table_;
|
||||
word_t j, k;
|
||||
|
@ -149,16 +149,15 @@ static __inline__ void
|
|||
k += sizeof(big_register_t)) {
|
||||
if (elem_bytes % sizeof(big_register_t)) {
|
||||
/* unaligned */
|
||||
((unaligned_br_t *) (out + k))->unaligned
|
||||
|=
|
||||
br_mask &
|
||||
((const unaligned_br_t
|
||||
*)(&table[k + j * elem_bytes]))->unaligned;
|
||||
((unaligned_br_t *)(out + k))->unaligned |=
|
||||
br_mask
|
||||
& ((const unaligned_br_t *)
|
||||
(&table[k + j * elem_bytes]))->unaligned;
|
||||
} else {
|
||||
/* aligned */
|
||||
*(big_register_t *) (out + k) |=
|
||||
br_mask & *(const big_register_t
|
||||
*)(&table[k + j * elem_bytes]);
|
||||
*(big_register_t *)(out + k) |=
|
||||
br_mask
|
||||
& *(const big_register_t *)(&table[k + j * elem_bytes]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -167,14 +166,15 @@ static __inline__ void
|
|||
for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
|
||||
if (elem_bytes % sizeof(word_t)) {
|
||||
/* input unaligned, output aligned */
|
||||
*(word_t *) (out + k) |=
|
||||
mask &
|
||||
((const unaligned_word_t
|
||||
*)(&table[k + j * elem_bytes]))->unaligned;
|
||||
*(word_t *)(out + k) |=
|
||||
mask
|
||||
& ((const unaligned_word_t *)
|
||||
(&table[k + j * elem_bytes]))->unaligned;
|
||||
} else {
|
||||
/* aligned */
|
||||
*(word_t *) (out + k) |=
|
||||
mask & *(const word_t *)(&table[k + j * elem_bytes]);
|
||||
*(word_t *)(out + k) |=
|
||||
mask
|
||||
& *(const word_t *)(&table[k + j * elem_bytes]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -187,8 +187,8 @@ static __inline__ void
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Constant-time a = mask ? bTrue : bFalse.
|
||||
/*
|
||||
* Constant-time a = mask ? bTrue : bFalse.
|
||||
*
|
||||
* The input and output must be at least as aligned as alignment_bytes
|
||||
* or their size, whichever is smaller.
|
||||
|
@ -196,12 +196,12 @@ static __inline__ void
|
|||
* Note that the output is not __restrict__, but if it overlaps either
|
||||
* input, it must be equal and not partially overlap.
|
||||
*/
|
||||
static __inline__ void
|
||||
__attribute__ ((unused, always_inline))
|
||||
constant_time_select(void *a_,
|
||||
const void *bFalse_,
|
||||
const void *bTrue_,
|
||||
word_t elem_bytes, mask_t mask, size_t alignment_bytes)
|
||||
static ossl_inline void constant_time_select(void *a_,
|
||||
const void *bFalse_,
|
||||
const void *bTrue_,
|
||||
word_t elem_bytes,
|
||||
mask_t mask,
|
||||
size_t alignment_bytes)
|
||||
{
|
||||
unsigned char *a = (unsigned char *)a_;
|
||||
const unsigned char *bTrue = (const unsigned char *)bTrue_;
|
||||
|
@ -215,15 +215,15 @@ static __inline__ void
|
|||
k += sizeof(big_register_t)) {
|
||||
if (alignment_bytes % sizeof(big_register_t)) {
|
||||
/* unaligned */
|
||||
((unaligned_br_t *) (&a[k]))->unaligned =
|
||||
(br_mask & ((const unaligned_br_t *)(&bTrue[k]))->unaligned)
|
||||
| (~br_mask &
|
||||
((const unaligned_br_t *)(&bFalse[k]))->unaligned);
|
||||
((unaligned_br_t *)(&a[k]))->unaligned =
|
||||
(br_mask & ((const unaligned_br_t *)(&bTrue[k]))->unaligned)
|
||||
| (~br_mask
|
||||
& ((const unaligned_br_t *)(&bFalse[k]))->unaligned);
|
||||
} else {
|
||||
/* aligned */
|
||||
*(big_register_t *) (a + k) =
|
||||
(br_mask & *(const big_register_t *)(&bTrue[k]))
|
||||
| (~br_mask & *(const big_register_t *)(&bFalse[k]));
|
||||
(br_mask & *(const big_register_t *)(&bTrue[k]))
|
||||
| (~br_mask & *(const big_register_t *)(&bFalse[k]));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -28,16 +28,13 @@
|
|||
#define DECAF_WNAF_VAR_TABLE_BITS 3
|
||||
|
||||
static const int EDWARDS_D = -39081;
|
||||
static const curve448_scalar_t precomputed_scalarmul_adjustment = { {{
|
||||
SC_LIMB
|
||||
(0xc873d6d54a7bb0cf),
|
||||
SC_LIMB
|
||||
(0xe933d8d723a70aad),
|
||||
SC_LIMB
|
||||
(0xbb124b65129c96fd),
|
||||
SC_LIMB
|
||||
(0x00000008335dc163)
|
||||
}}
|
||||
static const curve448_scalar_t precomputed_scalarmul_adjustment = {
|
||||
{
|
||||
{
|
||||
SC_LIMB(0xc873d6d54a7bb0cf), SC_LIMB(0xe933d8d723a70aad),
|
||||
SC_LIMB(0xbb124b65129c96fd), SC_LIMB(0x00000008335dc163)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const uint8_t decaf_x448_base_point[DECAF_X448_PUBLIC_BYTES] = { 0x05 };
|
||||
|
@ -69,7 +66,7 @@ extern const gf curve448_precomputed_base_as_fe[];
|
|||
const curve448_precomputed_s *curve448_precomputed_base =
|
||||
(const curve448_precomputed_s *)&curve448_precomputed_base_as_fe;
|
||||
|
||||
/** Inverse. */
|
||||
/* Inverse. */
|
||||
static void gf_invert(gf y, const gf x, int assert_nonzero)
|
||||
{
|
||||
mask_t ret;
|
||||
|
@ -89,11 +86,11 @@ static void gf_invert(gf y, const gf x, int assert_nonzero)
|
|||
const curve448_point_t curve448_point_identity =
|
||||
{ {{{{0}}}, {{{1}}}, {{{1}}}, {{{0}}}} };
|
||||
|
||||
static void
|
||||
point_double_internal(curve448_point_t p,
|
||||
const curve448_point_t q, int before_double)
|
||||
static void point_double_internal(curve448_point_t p, const curve448_point_t q,
|
||||
int before_double)
|
||||
{
|
||||
gf a, b, c, d;
|
||||
|
||||
gf_sqr(c, q->x);
|
||||
gf_sqr(a, q->y);
|
||||
gf_add_nr(d, c, a); /* 2+e */
|
||||
|
@ -136,6 +133,7 @@ static void pt_to_pniels(pniels_t b, const curve448_point_t a)
|
|||
static void pniels_to_pt(curve448_point_t e, const pniels_t d)
|
||||
{
|
||||
gf eu;
|
||||
|
||||
gf_add(eu, d->n->b, d->n->a);
|
||||
gf_sub(e->y, d->n->b, d->n->a);
|
||||
gf_mul(e->t, e->y, eu);
|
||||
|
@ -152,10 +150,11 @@ static void niels_to_pt(curve448_point_t e, const niels_t n)
|
|||
gf_copy(e->z, ONE);
|
||||
}
|
||||
|
||||
static void
|
||||
add_niels_to_pt(curve448_point_t d, const niels_t e, int before_double)
|
||||
static void add_niels_to_pt(curve448_point_t d, const niels_t e,
|
||||
int before_double)
|
||||
{
|
||||
gf a, b, c;
|
||||
|
||||
gf_sub_nr(b, d->y, d->x); /* 3+e */
|
||||
gf_mul(a, e->a, b);
|
||||
gf_add_nr(b, d->x, d->y); /* 2+e */
|
||||
|
@ -172,8 +171,8 @@ add_niels_to_pt(curve448_point_t d, const niels_t e, int before_double)
|
|||
gf_mul(d->t, b, c);
|
||||
}
|
||||
|
||||
static void
|
||||
sub_niels_from_pt(curve448_point_t d, const niels_t e, int before_double)
|
||||
static void sub_niels_from_pt(curve448_point_t d, const niels_t e,
|
||||
int before_double)
|
||||
{
|
||||
gf a, b, c;
|
||||
gf_sub_nr(b, d->y, d->x); /* 3+e */
|
||||
|
@ -192,19 +191,21 @@ sub_niels_from_pt(curve448_point_t d, const niels_t e, int before_double)
|
|||
gf_mul(d->t, b, c);
|
||||
}
|
||||
|
||||
static void
|
||||
add_pniels_to_pt(curve448_point_t p, const pniels_t pn, int before_double)
|
||||
static void add_pniels_to_pt(curve448_point_t p, const pniels_t pn,
|
||||
int before_double)
|
||||
{
|
||||
gf L0;
|
||||
|
||||
gf_mul(L0, p->z, pn->z);
|
||||
gf_copy(p->z, L0);
|
||||
add_niels_to_pt(p, pn->n, before_double);
|
||||
}
|
||||
|
||||
static void
|
||||
sub_pniels_from_pt(curve448_point_t p, const pniels_t pn, int before_double)
|
||||
static void sub_pniels_from_pt(curve448_point_t p, const pniels_t pn,
|
||||
int before_double)
|
||||
{
|
||||
gf L0;
|
||||
|
||||
gf_mul(L0, p->z, pn->z);
|
||||
gf_copy(p->z, L0);
|
||||
sub_niels_from_pt(p, pn->n, before_double);
|
||||
|
@ -244,9 +245,9 @@ decaf_bool_t curve448_point_valid(const curve448_point_t p)
|
|||
return mask_to_bool(out);
|
||||
}
|
||||
|
||||
static ossl_inline void
|
||||
constant_time_lookup_niels(niels_s * __restrict__ ni,
|
||||
const niels_t * table, int nelts, int idx)
|
||||
static ossl_inline void constant_time_lookup_niels(niels_s * __restrict__ ni,
|
||||
const niels_t * table,
|
||||
int nelts, int idx)
|
||||
{
|
||||
constant_time_lookup(ni, table, sizeof(niels_s), nelts, idx);
|
||||
}
|
||||
|
@ -300,10 +301,9 @@ void curve448_precomputed_scalarmul(curve448_point_t out,
|
|||
OPENSSL_cleanse(scalar1x, sizeof(scalar1x));
|
||||
}
|
||||
|
||||
void curve448_point_mul_by_ratio_and_encode_like_eddsa(uint8_t
|
||||
enc
|
||||
[DECAF_EDDSA_448_PUBLIC_BYTES],
|
||||
const curve448_point_t p)
|
||||
void curve448_point_mul_by_ratio_and_encode_like_eddsa(
|
||||
uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES],
|
||||
const curve448_point_t p)
|
||||
{
|
||||
|
||||
/* The point is now on the twisted curve. Move it to untwisted. */
|
||||
|
@ -314,6 +314,7 @@ void curve448_point_mul_by_ratio_and_encode_like_eddsa(uint8_t
|
|||
{
|
||||
/* 4-isogeny: 2xy/(y^+x^2), (y^2-x^2)/(2z^2-y^2+x^2) */
|
||||
gf u;
|
||||
|
||||
gf_sqr(x, q->x);
|
||||
gf_sqr(t, q->y);
|
||||
gf_add(u, x, t);
|
||||
|
@ -347,12 +348,9 @@ void curve448_point_mul_by_ratio_and_encode_like_eddsa(uint8_t
|
|||
curve448_point_destroy(q);
|
||||
}
|
||||
|
||||
decaf_error_t curve448_point_decode_like_eddsa_and_mul_by_ratio(curve448_point_t
|
||||
p,
|
||||
const uint8_t
|
||||
enc
|
||||
[DECAF_EDDSA_448_PUBLIC_BYTES]
|
||||
)
|
||||
decaf_error_t curve448_point_decode_like_eddsa_and_mul_by_ratio(
|
||||
curve448_point_t p,
|
||||
const uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES])
|
||||
{
|
||||
uint8_t enc2[DECAF_EDDSA_448_PUBLIC_BYTES];
|
||||
mask_t low;
|
||||
|
@ -411,8 +409,7 @@ decaf_error_t curve448_point_decode_like_eddsa_and_mul_by_ratio(curve448_point_t
|
|||
|
||||
decaf_error_t decaf_x448(uint8_t out[X_PUBLIC_BYTES],
|
||||
const uint8_t base[X_PUBLIC_BYTES],
|
||||
const uint8_t scalar[X_PRIVATE_BYTES]
|
||||
)
|
||||
const uint8_t scalar[X_PRIVATE_BYTES])
|
||||
{
|
||||
gf x1, x2, z2, x3, z3, t1, t2;
|
||||
int t;
|
||||
|
@ -487,8 +484,7 @@ decaf_error_t decaf_x448(uint8_t out[X_PUBLIC_BYTES],
|
|||
/* Thanks Johan Pascal */
|
||||
void decaf_ed448_convert_public_key_to_x448(uint8_t x[DECAF_X448_PUBLIC_BYTES],
|
||||
const uint8_t
|
||||
ed[DECAF_EDDSA_448_PUBLIC_BYTES]
|
||||
)
|
||||
ed[DECAF_EDDSA_448_PUBLIC_BYTES])
|
||||
{
|
||||
gf y;
|
||||
const uint8_t mask = (uint8_t)(0xFE << (7));
|
||||
|
@ -527,8 +523,7 @@ void curve448_point_mul_by_ratio_and_encode_like_x448(uint8_t
|
|||
}
|
||||
|
||||
void decaf_x448_derive_public_key(uint8_t out[X_PUBLIC_BYTES],
|
||||
const uint8_t scalar[X_PRIVATE_BYTES]
|
||||
)
|
||||
const uint8_t scalar[X_PRIVATE_BYTES])
|
||||
{
|
||||
/* Scalar conditioning */
|
||||
uint8_t scalar2[X_PRIVATE_BYTES];
|
||||
|
@ -553,17 +548,15 @@ void decaf_x448_derive_public_key(uint8_t out[X_PUBLIC_BYTES],
|
|||
curve448_point_destroy(p);
|
||||
}
|
||||
|
||||
/**
|
||||
* @cond internal
|
||||
* Control for variable-time scalar multiply algorithms.
|
||||
*/
|
||||
/* Control for variable-time scalar multiply algorithms. */
|
||||
struct smvt_control {
|
||||
int power, addend;
|
||||
};
|
||||
|
||||
static int recode_wnaf(struct smvt_control *control, /* [nbits/(table_bits+1) +
|
||||
* 3] */
|
||||
const curve448_scalar_t scalar, unsigned int table_bits)
|
||||
static int recode_wnaf(struct smvt_control *control,
|
||||
/* [nbits/(table_bits + 1) + 3] */
|
||||
const curve448_scalar_t scalar,
|
||||
unsigned int table_bits)
|
||||
{
|
||||
unsigned int table_size = DECAF_448_SCALAR_BITS / (table_bits + 1) + 3;
|
||||
int position = table_size - 1; /* at the end */
|
||||
|
@ -587,16 +580,13 @@ static int recode_wnaf(struct smvt_control *control, /* [nbits/(table_bits+1) +
|
|||
for (w = 1; w < (DECAF_448_SCALAR_BITS - 1) / 16 + 3; w++) {
|
||||
if (w < (DECAF_448_SCALAR_BITS - 1) / 16 + 1) {
|
||||
/* Refill the 16 high bits of current */
|
||||
current +=
|
||||
(uint32_t)((scalar->limb[w / B_OVER_16] >> (16 *
|
||||
(w %
|
||||
B_OVER_16))) <<
|
||||
16);
|
||||
current += (uint32_t)((scalar->limb[w / B_OVER_16]
|
||||
>> (16 * (w % B_OVER_16))) << 16);
|
||||
}
|
||||
|
||||
while (current & 0xFFFF) {
|
||||
uint32_t pos = __builtin_ctz((uint32_t)current), odd =
|
||||
(uint32_t)current >> pos;
|
||||
uint32_t pos = __builtin_ctz((uint32_t)current);
|
||||
uint32_t odd = (uint32_t)current >> pos;
|
||||
int32_t delta = odd & mask;
|
||||
|
||||
assert(position >= 0);
|
||||
|
@ -619,9 +609,9 @@ static int recode_wnaf(struct smvt_control *control, /* [nbits/(table_bits+1) +
|
|||
return n - 1;
|
||||
}
|
||||
|
||||
static void
|
||||
prepare_wnaf_table(pniels_t * output,
|
||||
const curve448_point_t working, unsigned int tbits)
|
||||
static void prepare_wnaf_table(pniels_t * output,
|
||||
const curve448_point_t working,
|
||||
unsigned int tbits)
|
||||
{
|
||||
curve448_point_t tmp;
|
||||
int i;
|
||||
|
@ -698,12 +688,12 @@ void curve448_base_double_scalarmul_non_secret(curve448_point_t combo,
|
|||
|
||||
if (control_var[contv].addend > 0) {
|
||||
add_pniels_to_pt(combo,
|
||||
precmp_var[control_var[contv].addend >> 1], i
|
||||
&& !cp);
|
||||
precmp_var[control_var[contv].addend >> 1],
|
||||
i && !cp);
|
||||
} else {
|
||||
sub_pniels_from_pt(combo,
|
||||
precmp_var[(-control_var[contv].addend) >>
|
||||
1], i && !cp);
|
||||
precmp_var[(-control_var[contv].addend)
|
||||
>> 1], i && !cp);
|
||||
}
|
||||
contv++;
|
||||
}
|
||||
|
@ -713,8 +703,8 @@ void curve448_base_double_scalarmul_non_secret(curve448_point_t combo,
|
|||
|
||||
if (control_pre[contp].addend > 0) {
|
||||
add_niels_to_pt(combo,
|
||||
curve448_wnaf_base[control_pre[contp].addend >>
|
||||
1], i);
|
||||
curve448_wnaf_base[control_pre[contp].addend
|
||||
>> 1], i);
|
||||
} else {
|
||||
sub_niels_from_pt(combo,
|
||||
curve448_wnaf_base[(-control_pre
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -27,48 +27,59 @@ extern "C" {
|
|||
* with arch_arm32.
|
||||
*/
|
||||
# ifndef DECAF_WORD_BITS
|
||||
# if (defined(__ILP64__) || defined(__amd64__) || defined(__x86_64__) || (((__UINT_FAST32_MAX__)>>30)>>30))
|
||||
# define DECAF_WORD_BITS 64 /**< The number of bits in a word */
|
||||
# if (defined(__ILP64__) || defined(__amd64__) || defined(__x86_64__) \
|
||||
|| (((__UINT_FAST32_MAX__)>>30)>>30))
|
||||
# define DECAF_WORD_BITS 64 /* The number of bits in a word */
|
||||
# else
|
||||
# define DECAF_WORD_BITS 32 /**< The number of bits in a word */
|
||||
# define DECAF_WORD_BITS 32 /* The number of bits in a word */
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# if DECAF_WORD_BITS == 64
|
||||
typedef uint64_t decaf_word_t; /**< Word size for internal computations */
|
||||
typedef int64_t decaf_sword_t; /**< Signed word size for internal computations */
|
||||
typedef uint64_t decaf_bool_t; /**< "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
|
||||
typedef __uint128_t decaf_dword_t; /**< Double-word size for internal computations */
|
||||
typedef __int128_t decaf_dsword_t; /**< Signed double-word size for internal computations */
|
||||
# elif DECAF_WORD_BITS == 32 /**< The number of bits in a word */
|
||||
typedef uint32_t decaf_word_t; /**< Word size for internal computations */
|
||||
typedef int32_t decaf_sword_t; /**< Signed word size for internal computations */
|
||||
typedef uint32_t decaf_bool_t; /**< "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
|
||||
typedef uint64_t decaf_dword_t; /**< Double-word size for internal computations */
|
||||
typedef int64_t decaf_dsword_t; /**< Signed double-word size for internal computations */
|
||||
/* Word size for internal computations */
|
||||
typedef uint64_t decaf_word_t;
|
||||
/* Signed word size for internal computations */
|
||||
typedef int64_t decaf_sword_t;
|
||||
/* "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
|
||||
typedef uint64_t decaf_bool_t;
|
||||
/* Double-word size for internal computations */
|
||||
typedef __uint128_t decaf_dword_t;
|
||||
/* Signed double-word size for internal computations */
|
||||
typedef __int128_t decaf_dsword_t;
|
||||
# elif DECAF_WORD_BITS == 32
|
||||
/* Word size for internal computations */
|
||||
typedef uint32_t decaf_word_t;
|
||||
/* Signed word size for internal computations */
|
||||
typedef int32_t decaf_sword_t;
|
||||
/* "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
|
||||
typedef uint32_t decaf_bool_t;
|
||||
/* Double-word size for internal computations */
|
||||
typedef uint64_t decaf_dword_t;
|
||||
/* Signed double-word size for internal computations */
|
||||
typedef int64_t decaf_dsword_t;
|
||||
# else
|
||||
# error "Only supporting DECAF_WORD_BITS = 32 or 64 for now"
|
||||
# endif
|
||||
|
||||
/** DECAF_TRUE = -1 so that DECAF_TRUE & x = x */
|
||||
/* DECAF_TRUE = -1 so that DECAF_TRUE & x = x */
|
||||
static const decaf_bool_t DECAF_TRUE = -(decaf_bool_t) 1;
|
||||
|
||||
/** DECAF_FALSE = 0 so that DECAF_FALSE & x = 0 */
|
||||
/* DECAF_FALSE = 0 so that DECAF_FALSE & x = 0 */
|
||||
static const decaf_bool_t DECAF_FALSE = 0;
|
||||
|
||||
/** Another boolean type used to indicate success or failure. */
|
||||
/* Another boolean type used to indicate success or failure. */
|
||||
typedef enum {
|
||||
DECAF_SUCCESS = -1, /**< The operation succeeded. */
|
||||
DECAF_FAILURE = 0 /**< The operation failed. */
|
||||
} decaf_error_t;
|
||||
|
||||
/** Return success if x is true */
|
||||
/* Return success if x is true */
|
||||
static ossl_inline decaf_error_t decaf_succeed_if(decaf_bool_t x)
|
||||
{
|
||||
return (decaf_error_t) x;
|
||||
}
|
||||
|
||||
/** Return DECAF_TRUE iff x == DECAF_SUCCESS */
|
||||
/* Return DECAF_TRUE iff x == DECAF_SUCCESS */
|
||||
static ossl_inline decaf_bool_t decaf_successful(decaf_error_t e)
|
||||
{
|
||||
decaf_dword_t w = ((decaf_word_t) e) ^ ((decaf_word_t) DECAF_SUCCESS);
|
||||
|
|
|
@ -19,111 +19,106 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** Number of bytes in an EdDSA public key. */
|
||||
/* Number of bytes in an EdDSA public key. */
|
||||
# define DECAF_EDDSA_448_PUBLIC_BYTES 57
|
||||
|
||||
/** Number of bytes in an EdDSA private key. */
|
||||
/* Number of bytes in an EdDSA private key. */
|
||||
# define DECAF_EDDSA_448_PRIVATE_BYTES DECAF_EDDSA_448_PUBLIC_BYTES
|
||||
|
||||
/** Number of bytes in an EdDSA private key. */
|
||||
# define DECAF_EDDSA_448_SIGNATURE_BYTES (DECAF_EDDSA_448_PUBLIC_BYTES + DECAF_EDDSA_448_PRIVATE_BYTES)
|
||||
/* Number of bytes in an EdDSA private key. */
|
||||
# define DECAF_EDDSA_448_SIGNATURE_BYTES (DECAF_EDDSA_448_PUBLIC_BYTES + \
|
||||
DECAF_EDDSA_448_PRIVATE_BYTES)
|
||||
|
||||
/** Does EdDSA support non-contextual signatures? */
|
||||
/* Does EdDSA support non-contextual signatures? */
|
||||
# define DECAF_EDDSA_448_SUPPORTS_CONTEXTLESS_SIGS 0
|
||||
|
||||
/** EdDSA encoding ratio. */
|
||||
/* EdDSA encoding ratio. */
|
||||
# define DECAF_448_EDDSA_ENCODE_RATIO 4
|
||||
|
||||
/** EdDSA decoding ratio. */
|
||||
/* EdDSA decoding ratio. */
|
||||
# define DECAF_448_EDDSA_DECODE_RATIO (4 / 4)
|
||||
|
||||
/**
|
||||
* @brief EdDSA key generation. This function uses a different (non-Decaf)
|
||||
* encoding.
|
||||
/*
|
||||
* EdDSA key generation. This function uses a different (non-Decaf) encoding.
|
||||
*
|
||||
* @param [out] pubkey The public key.
|
||||
* @param [in] privkey The private key.
|
||||
* pubkey (out): The public key.
|
||||
* privkey (in): The private key.
|
||||
*/
|
||||
decaf_error_t decaf_ed448_derive_public_key(uint8_t
|
||||
pubkey
|
||||
[DECAF_EDDSA_448_PUBLIC_BYTES],
|
||||
const uint8_t
|
||||
privkey
|
||||
[DECAF_EDDSA_448_PRIVATE_BYTES]
|
||||
);
|
||||
decaf_error_t decaf_ed448_derive_public_key(
|
||||
uint8_t pubkey [DECAF_EDDSA_448_PUBLIC_BYTES],
|
||||
const uint8_t privkey [DECAF_EDDSA_448_PRIVATE_BYTES]);
|
||||
|
||||
/**
|
||||
* @brief EdDSA signing.
|
||||
/*
|
||||
* EdDSA signing.
|
||||
*
|
||||
* @param [out] signature The signature.
|
||||
* @param [in] privkey The private key.
|
||||
* @param [in] pubkey The public key.
|
||||
* @param [in] message The message to sign.
|
||||
* @param [in] message_len The length of the message.
|
||||
* @param [in] prehashed Nonzero if the message is actually the hash of something you want to sign.
|
||||
* @param [in] context A "context" for this signature of up to 255 bytes.
|
||||
* @param [in] context_len Length of the context.
|
||||
* signature (out): The signature.
|
||||
* privkey (in): The private key.
|
||||
* pubkey (in): The public key.
|
||||
* message (in): The message to sign.
|
||||
* message_len (in): The length of the message.
|
||||
* prehashed (in): Nonzero if the message is actually the hash of something
|
||||
* you want to sign.
|
||||
* context (in): A "context" for this signature of up to 255 bytes.
|
||||
* context_len (in): Length of the context.
|
||||
*
|
||||
* @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
|
||||
* messages, at least without some very careful protocol-level disambiguation. For Ed448 it is
|
||||
* safe. The C++ wrapper is designed to make it harder to screw this up, but this C code gives
|
||||
* you no seat belt.
|
||||
* For Ed25519, it is unsafe to use the same key for both prehashed and
|
||||
* non-prehashed messages, at least without some very careful protocol-level
|
||||
* disambiguation. For Ed448 it is safe. The C++ wrapper is designed to make
|
||||
* it harder to screw this up, but this C code gives you no seat belt.
|
||||
*/
|
||||
decaf_error_t decaf_ed448_sign(uint8_t
|
||||
signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
|
||||
const uint8_t
|
||||
privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
|
||||
const uint8_t
|
||||
pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
|
||||
const uint8_t *message, size_t message_len,
|
||||
uint8_t prehashed, const uint8_t *context,
|
||||
size_t context_len)
|
||||
__attribute__ ((nonnull(1, 2, 3)));
|
||||
decaf_error_t decaf_ed448_sign(
|
||||
uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
|
||||
const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
|
||||
const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
|
||||
const uint8_t *message, size_t message_len,
|
||||
uint8_t prehashed, const uint8_t *context,
|
||||
size_t context_len)
|
||||
__attribute__ ((nonnull(1, 2, 3)));
|
||||
|
||||
/**
|
||||
* @brief EdDSA signing with prehash.
|
||||
/*
|
||||
* EdDSA signing with prehash.
|
||||
*
|
||||
* @param [out] signature The signature.
|
||||
* @param [in] privkey The private key.
|
||||
* @param [in] pubkey The public key.
|
||||
* @param [in] hash The hash of the message. This object will not be modified by the call.
|
||||
* @param [in] context A "context" for this signature of up to 255 bytes. Must be the same as what was used for the prehash.
|
||||
* @param [in] context_len Length of the context.
|
||||
* signature (out): The signature.
|
||||
* privkey (in): The private key.
|
||||
* pubkey (in): The public key.
|
||||
* hash (in): The hash of the message. This object will not be modified by the
|
||||
* call.
|
||||
* context (in): A "context" for this signature of up to 255 bytes. Must be the
|
||||
* same as what was used for the prehash.
|
||||
* context_len (in): Length of the context.
|
||||
*
|
||||
* @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
|
||||
* messages, at least without some very careful protocol-level disambiguation. For Ed448 it is
|
||||
* safe. The C++ wrapper is designed to make it harder to screw this up, but this C code gives
|
||||
* you no seat belt.
|
||||
* For Ed25519, it is unsafe to use the same key for both prehashed and
|
||||
* non-prehashed messages, at least without some very careful protocol-level
|
||||
* disambiguation. For Ed448 it is safe. The C++ wrapper is designed to make
|
||||
* it harder to screw this up, but this C code gives you no seat belt.
|
||||
*/
|
||||
decaf_error_t decaf_ed448_sign_prehash(uint8_t
|
||||
signature
|
||||
[DECAF_EDDSA_448_SIGNATURE_BYTES],
|
||||
const uint8_t
|
||||
privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
|
||||
const uint8_t
|
||||
pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
|
||||
const uint8_t hash[64],
|
||||
const uint8_t *context,
|
||||
size_t context_len)
|
||||
__attribute__ ((nonnull(1, 2, 3, 4)));
|
||||
decaf_error_t decaf_ed448_sign_prehash(
|
||||
uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
|
||||
const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
|
||||
const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
|
||||
const uint8_t hash[64],
|
||||
const uint8_t *context,
|
||||
size_t context_len)
|
||||
__attribute__ ((nonnull(1, 2, 3, 4)));
|
||||
|
||||
/**
|
||||
* @brief EdDSA signature verification.
|
||||
/*
|
||||
* EdDSA signature verification.
|
||||
*
|
||||
* Uses the standard (i.e. less-strict) verification formula.
|
||||
*
|
||||
* @param [in] signature The signature.
|
||||
* @param [in] pubkey The public key.
|
||||
* @param [in] message The message to verify.
|
||||
* @param [in] message_len The length of the message.
|
||||
* @param [in] prehashed Nonzero if the message is actually the hash of something you want to verify.
|
||||
* @param [in] context A "context" for this signature of up to 255 bytes.
|
||||
* @param [in] context_len Length of the context.
|
||||
* signature (in): The signature.
|
||||
* pubkey (in): The public key.
|
||||
* message (in): The message to verify.
|
||||
* message_len (in): The length of the message.
|
||||
* prehashed (in): Nonzero if the message is actually the hash of something you
|
||||
* want to verify.
|
||||
* context (in): A "context" for this signature of up to 255 bytes.
|
||||
* context_len (in): Length of the context.
|
||||
*
|
||||
* @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
|
||||
* messages, at least without some very careful protocol-level disambiguation. For Ed448 it is
|
||||
* safe. The C++ wrapper is designed to make it harder to screw this up, but this C code gives
|
||||
* you no seat belt.
|
||||
* For Ed25519, it is unsafe to use the same key for both prehashed and
|
||||
* non-prehashed messages, at least without some very careful protocol-level
|
||||
* disambiguation. For Ed448 it is safe. The C++ wrapper is designed to make
|
||||
* it harder to screw this up, but this C code gives you no seat belt.
|
||||
*/
|
||||
decaf_error_t decaf_ed448_verify(const uint8_t
|
||||
signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
|
||||
|
@ -132,36 +127,36 @@ decaf_error_t decaf_ed448_verify(const uint8_t
|
|||
const uint8_t *message, size_t message_len,
|
||||
uint8_t prehashed, const uint8_t *context,
|
||||
uint8_t context_len)
|
||||
__attribute__ ((nonnull(1, 2)));
|
||||
__attribute__ ((nonnull(1, 2)));
|
||||
|
||||
/**
|
||||
* @brief EdDSA signature verification.
|
||||
/*
|
||||
* EdDSA signature verification.
|
||||
*
|
||||
* Uses the standard (i.e. less-strict) verification formula.
|
||||
*
|
||||
* @param [in] signature The signature.
|
||||
* @param [in] pubkey The public key.
|
||||
* @param [in] hash The hash of the message. This object will not be modified by the call.
|
||||
* @param [in] context A "context" for this signature of up to 255 bytes. Must be the same as what was used for the prehash.
|
||||
* @param [in] context_len Length of the context.
|
||||
* signature (in): The signature.
|
||||
* pubkey (in): The public key.
|
||||
* hash (in): The hash of the message. This object will not be modified by the
|
||||
* call.
|
||||
* context (in): A "context" for this signature of up to 255 bytes. Must be the
|
||||
* same as what was used for the prehash.
|
||||
* context_len (in): Length of the context.
|
||||
*
|
||||
* @warning For Ed25519, it is unsafe to use the same key for both prehashed and non-prehashed
|
||||
* messages, at least without some very careful protocol-level disambiguation. For Ed448 it is
|
||||
* safe. The C++ wrapper is designed to make it harder to screw this up, but this C code gives
|
||||
* you no seat belt.
|
||||
* For Ed25519, it is unsafe to use the same key for both prehashed and
|
||||
* non-prehashed messages, at least without some very careful protocol-level
|
||||
* disambiguation. For Ed448 it is safe. The C++ wrapper is designed to make
|
||||
* it harder to screw this up, but this C code gives you no seat belt.
|
||||
*/
|
||||
decaf_error_t decaf_ed448_verify_prehash(const uint8_t
|
||||
signature
|
||||
[DECAF_EDDSA_448_SIGNATURE_BYTES],
|
||||
const uint8_t
|
||||
pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
|
||||
const uint8_t hash[64],
|
||||
const uint8_t *context,
|
||||
uint8_t context_len)
|
||||
__attribute__ ((nonnull(1, 2)));
|
||||
decaf_error_t decaf_ed448_verify_prehash(
|
||||
const uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
|
||||
const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
|
||||
const uint8_t hash[64],
|
||||
const uint8_t *context,
|
||||
uint8_t context_len)
|
||||
__attribute__ ((nonnull(1, 2)));
|
||||
|
||||
/**
|
||||
* @brief EdDSA point encoding. Used internally, exposed externally.
|
||||
/*
|
||||
* EdDSA point encoding. Used internally, exposed externally.
|
||||
* Multiplies by DECAF_448_EDDSA_ENCODE_RATIO first.
|
||||
*
|
||||
* The multiplication is required because the EdDSA encoding represents
|
||||
|
@ -181,62 +176,52 @@ decaf_error_t decaf_ed448_verify_prehash(const uint8_t
|
|||
* this function, you will get DECAF_448_EDDSA_ENCODE_RATIO times the
|
||||
* EdDSA base point.
|
||||
*
|
||||
* @param [out] enc The encoded point.
|
||||
* @param [in] p The point.
|
||||
* enc (out): The encoded point.
|
||||
* p (in): The point.
|
||||
*/
|
||||
void curve448_point_mul_by_ratio_and_encode_like_eddsa(uint8_t
|
||||
enc
|
||||
[DECAF_EDDSA_448_PUBLIC_BYTES],
|
||||
const curve448_point_t
|
||||
p);
|
||||
void curve448_point_mul_by_ratio_and_encode_like_eddsa(
|
||||
uint8_t enc [DECAF_EDDSA_448_PUBLIC_BYTES],
|
||||
const curve448_point_t p);
|
||||
|
||||
/**
|
||||
* @brief EdDSA point decoding. Multiplies by DECAF_448_EDDSA_DECODE_RATIO,
|
||||
* and ignores cofactor information.
|
||||
/*
|
||||
* EdDSA point decoding. Multiplies by DECAF_448_EDDSA_DECODE_RATIO, and
|
||||
* ignores cofactor information.
|
||||
*
|
||||
* See notes on curve448_point_mul_by_ratio_and_encode_like_eddsa
|
||||
*
|
||||
* @param [out] enc The encoded point.
|
||||
* @param [in] p The point.
|
||||
* enc (out): The encoded point.
|
||||
* p (in): The point.
|
||||
*/
|
||||
decaf_error_t curve448_point_decode_like_eddsa_and_mul_by_ratio(curve448_point_t
|
||||
p,
|
||||
const uint8_t
|
||||
enc
|
||||
[DECAF_EDDSA_448_PUBLIC_BYTES]
|
||||
);
|
||||
decaf_error_t curve448_point_decode_like_eddsa_and_mul_by_ratio(
|
||||
curve448_point_t p,
|
||||
const uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES]);
|
||||
|
||||
/**
|
||||
* @brief EdDSA to ECDH public key conversion
|
||||
/*
|
||||
* EdDSA to ECDH public key conversion
|
||||
* Deserialize the point to get y on Edwards curve,
|
||||
* Convert it to u coordinate on Montgomery curve.
|
||||
*
|
||||
* @warning This function does not check that the public key being converted
|
||||
* is a valid EdDSA public key (FUTURE?)
|
||||
* This function does not check that the public key being converted is a valid
|
||||
* EdDSA public key (FUTURE?)
|
||||
*
|
||||
* @param[out] x The ECDH public key as in RFC7748(point on Montgomery curve)
|
||||
* @param[in] ed The EdDSA public key(point on Edwards curve)
|
||||
* x (out): The ECDH public key as in RFC7748(point on Montgomery curve)
|
||||
* ed (in): The EdDSA public key(point on Edwards curve)
|
||||
*/
|
||||
void decaf_ed448_convert_public_key_to_x448(uint8_t x[DECAF_X448_PUBLIC_BYTES],
|
||||
const uint8_t
|
||||
ed[DECAF_EDDSA_448_PUBLIC_BYTES]
|
||||
);
|
||||
void decaf_ed448_convert_public_key_to_x448(
|
||||
uint8_t x[DECAF_X448_PUBLIC_BYTES],
|
||||
const uint8_t ed[DECAF_EDDSA_448_PUBLIC_BYTES]);
|
||||
|
||||
/**
|
||||
* @brief EdDSA to ECDH private key conversion
|
||||
/*
|
||||
* EdDSA to ECDH private key conversion
|
||||
* Using the appropriate hash function, hash the EdDSA private key
|
||||
* and keep only the lower bytes to get the ECDH private key
|
||||
*
|
||||
* @param[out] x The ECDH private key as in RFC7748
|
||||
* @param[in] ed The EdDSA private key
|
||||
* x (out): The ECDH private key as in RFC7748
|
||||
* ed (in): The EdDSA private key
|
||||
*/
|
||||
decaf_error_t decaf_ed448_convert_private_key_to_x448(uint8_t
|
||||
x
|
||||
[DECAF_X448_PRIVATE_BYTES],
|
||||
const uint8_t
|
||||
ed
|
||||
[DECAF_EDDSA_448_PRIVATE_BYTES]
|
||||
);
|
||||
decaf_error_t decaf_ed448_convert_private_key_to_x448(
|
||||
uint8_t x[DECAF_X448_PRIVATE_BYTES],
|
||||
const uint8_t ed[DECAF_EDDSA_448_PRIVATE_BYTES]);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
|
|
|
@ -55,8 +55,7 @@ static decaf_error_t oneshot_hash(uint8_t *out, size_t outlen,
|
|||
return DECAF_SUCCESS;
|
||||
}
|
||||
|
||||
static void clamp(uint8_t secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES]
|
||||
)
|
||||
static void clamp(uint8_t secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES])
|
||||
{
|
||||
uint8_t hibit = (1 << 0) >> 1;
|
||||
|
||||
|
@ -106,13 +105,9 @@ static decaf_error_t hash_init_with_dom(EVP_MD_CTX *hashctx,
|
|||
}
|
||||
|
||||
/* In this file because it uses the hash */
|
||||
decaf_error_t decaf_ed448_convert_private_key_to_x448(uint8_t
|
||||
x
|
||||
[DECAF_X448_PRIVATE_BYTES],
|
||||
const uint8_t
|
||||
ed
|
||||
[DECAF_EDDSA_448_PRIVATE_BYTES]
|
||||
)
|
||||
decaf_error_t decaf_ed448_convert_private_key_to_x448(
|
||||
uint8_t x[DECAF_X448_PRIVATE_BYTES],
|
||||
const uint8_t ed [DECAF_EDDSA_448_PRIVATE_BYTES])
|
||||
{
|
||||
/* pass the private key through oneshot_hash function */
|
||||
/* and keep the first DECAF_X448_PRIVATE_BYTES bytes */
|
||||
|
@ -121,13 +116,9 @@ decaf_error_t decaf_ed448_convert_private_key_to_x448(uint8_t
|
|||
ed, DECAF_EDDSA_448_PRIVATE_BYTES);
|
||||
}
|
||||
|
||||
decaf_error_t decaf_ed448_derive_public_key(uint8_t
|
||||
pubkey
|
||||
[DECAF_EDDSA_448_PUBLIC_BYTES],
|
||||
const uint8_t
|
||||
privkey
|
||||
[DECAF_EDDSA_448_PRIVATE_BYTES]
|
||||
)
|
||||
decaf_error_t decaf_ed448_derive_public_key(
|
||||
uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
|
||||
const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES])
|
||||
{
|
||||
/* only this much used for keygen */
|
||||
uint8_t secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES];
|
||||
|
@ -136,9 +127,9 @@ decaf_error_t decaf_ed448_derive_public_key(uint8_t
|
|||
curve448_point_t p;
|
||||
|
||||
if (!oneshot_hash(secret_scalar_ser, sizeof(secret_scalar_ser), privkey,
|
||||
DECAF_EDDSA_448_PRIVATE_BYTES)) {
|
||||
DECAF_EDDSA_448_PRIVATE_BYTES))
|
||||
return DECAF_FAILURE;
|
||||
}
|
||||
|
||||
clamp(secret_scalar_ser);
|
||||
|
||||
curve448_scalar_decode_long(secret_scalar, secret_scalar_ser,
|
||||
|
@ -152,9 +143,8 @@ decaf_error_t decaf_ed448_derive_public_key(uint8_t
|
|||
* converted it effectively picks up a factor of 2 from the isogenies. So
|
||||
* we might start at 2 instead of 1.
|
||||
*/
|
||||
for (c = 1; c < DECAF_448_EDDSA_ENCODE_RATIO; c <<= 1) {
|
||||
for (c = 1; c < DECAF_448_EDDSA_ENCODE_RATIO; c <<= 1)
|
||||
curve448_scalar_halve(secret_scalar, secret_scalar);
|
||||
}
|
||||
|
||||
curve448_precomputed_scalarmul(p, curve448_precomputed_base, secret_scalar);
|
||||
|
||||
|
@ -168,15 +158,13 @@ decaf_error_t decaf_ed448_derive_public_key(uint8_t
|
|||
return DECAF_SUCCESS;
|
||||
}
|
||||
|
||||
decaf_error_t decaf_ed448_sign(uint8_t
|
||||
signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
|
||||
const uint8_t
|
||||
privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
|
||||
const uint8_t
|
||||
pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
|
||||
const uint8_t *message, size_t message_len,
|
||||
uint8_t prehashed, const uint8_t *context,
|
||||
size_t context_len)
|
||||
decaf_error_t decaf_ed448_sign(
|
||||
uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
|
||||
const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
|
||||
const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
|
||||
const uint8_t *message, size_t message_len,
|
||||
uint8_t prehashed, const uint8_t *context,
|
||||
size_t context_len)
|
||||
{
|
||||
curve448_scalar_t secret_scalar;
|
||||
EVP_MD_CTX *hashctx = EVP_MD_CTX_new();
|
||||
|
@ -287,9 +275,6 @@ decaf_error_t decaf_ed448_sign_prehash(uint8_t
|
|||
{
|
||||
return decaf_ed448_sign(signature, privkey, pubkey, hash, 64, 1, context,
|
||||
context_len);
|
||||
/*
|
||||
* OPENSSL_cleanse(hash,sizeof(hash));
|
||||
*/
|
||||
}
|
||||
|
||||
decaf_error_t decaf_ed448_verify(const uint8_t
|
||||
|
@ -307,15 +292,13 @@ decaf_error_t decaf_ed448_verify(const uint8_t
|
|||
curve448_scalar_t response_scalar;
|
||||
unsigned int c;
|
||||
|
||||
if (DECAF_SUCCESS != error) {
|
||||
if (DECAF_SUCCESS != error)
|
||||
return error;
|
||||
}
|
||||
|
||||
error =
|
||||
curve448_point_decode_like_eddsa_and_mul_by_ratio(r_point, signature);
|
||||
if (DECAF_SUCCESS != error) {
|
||||
if (DECAF_SUCCESS != error)
|
||||
return error;
|
||||
}
|
||||
|
||||
{
|
||||
/* Compute the challenge */
|
||||
|
@ -345,9 +328,8 @@ decaf_error_t decaf_ed448_verify(const uint8_t
|
|||
&signature[DECAF_EDDSA_448_PUBLIC_BYTES],
|
||||
DECAF_EDDSA_448_PRIVATE_BYTES);
|
||||
|
||||
for (c = 1; c < DECAF_448_EDDSA_DECODE_RATIO; c <<= 1) {
|
||||
for (c = 1; c < DECAF_448_EDDSA_DECODE_RATIO; c <<= 1)
|
||||
curve448_scalar_add(response_scalar, response_scalar, response_scalar);
|
||||
}
|
||||
|
||||
/* pk_point = -c(x(P)) + (cx + k)G = kG */
|
||||
curve448_base_double_scalarmul_non_secret(pk_point,
|
||||
|
@ -356,20 +338,16 @@ decaf_error_t decaf_ed448_verify(const uint8_t
|
|||
return decaf_succeed_if(curve448_point_eq(pk_point, r_point));
|
||||
}
|
||||
|
||||
decaf_error_t decaf_ed448_verify_prehash(const uint8_t
|
||||
signature
|
||||
[DECAF_EDDSA_448_SIGNATURE_BYTES],
|
||||
const uint8_t
|
||||
pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
|
||||
const uint8_t hash[64],
|
||||
const uint8_t *context,
|
||||
uint8_t context_len)
|
||||
decaf_error_t decaf_ed448_verify_prehash(
|
||||
const uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
|
||||
const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
|
||||
const uint8_t hash[64], const uint8_t *context,
|
||||
uint8_t context_len)
|
||||
{
|
||||
decaf_error_t ret;
|
||||
|
||||
ret =
|
||||
decaf_ed448_verify(signature, pubkey, hash, 64, 1, context,
|
||||
context_len);
|
||||
ret = decaf_ed448_verify(signature, pubkey, hash, 64, 1, context,
|
||||
context_len);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -11,10 +11,10 @@
|
|||
*/
|
||||
#include "field.h"
|
||||
|
||||
static const gf MODULUS =
|
||||
{ FIELD_LITERAL(0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff,
|
||||
0xffffffffffffff, 0xfffffffffffffe, 0xffffffffffffff,
|
||||
0xffffffffffffff, 0xffffffffffffff)
|
||||
static const gf MODULUS = {
|
||||
FIELD_LITERAL(0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff,
|
||||
0xffffffffffffff, 0xfffffffffffffe, 0xffffffffffffff,
|
||||
0xffffffffffffff, 0xffffffffffffff)
|
||||
};
|
||||
|
||||
/** Serialize to wire format. */
|
||||
|
@ -27,9 +27,8 @@ void gf_serialize(uint8_t serial[SER_BYTES], const gf x, int with_hibit)
|
|||
|
||||
gf_copy(red, x);
|
||||
gf_strong_reduce(red);
|
||||
if (!with_hibit) {
|
||||
if (!with_hibit)
|
||||
assert(gf_hibit(red) == 0);
|
||||
}
|
||||
|
||||
UNROLL for (i = 0; i < (with_hibit ? X_SER_BYTES : SER_BYTES); i++) {
|
||||
if (fill < 8 && j < NLIMBS) {
|
||||
|
@ -43,7 +42,7 @@ void gf_serialize(uint8_t serial[SER_BYTES], const gf x, int with_hibit)
|
|||
}
|
||||
}
|
||||
|
||||
/** Return high bit of x = low bit of 2x mod p */
|
||||
/* Return high bit of x = low bit of 2x mod p */
|
||||
mask_t gf_hibit(const gf x)
|
||||
{
|
||||
gf y;
|
||||
|
@ -52,7 +51,7 @@ mask_t gf_hibit(const gf x)
|
|||
return -(y->limb[0] & 1);
|
||||
}
|
||||
|
||||
/** Return high bit of x = low bit of 2x mod p */
|
||||
/* Return high bit of x = low bit of 2x mod p */
|
||||
mask_t gf_lobit(const gf x)
|
||||
{
|
||||
gf y;
|
||||
|
@ -61,7 +60,7 @@ mask_t gf_lobit(const gf x)
|
|||
return -(y->limb[0] & 1);
|
||||
}
|
||||
|
||||
/** Deserialize from wire format; return -1 on success and 0 on failure. */
|
||||
/* Deserialize from wire format; return -1 on success and 0 on failure. */
|
||||
mask_t gf_deserialize(gf x, const uint8_t serial[SER_BYTES], int with_hibit,
|
||||
uint8_t hi_nmask)
|
||||
{
|
||||
|
@ -93,7 +92,7 @@ mask_t gf_deserialize(gf x, const uint8_t serial[SER_BYTES], int with_hibit,
|
|||
return succ & word_is_zero(buffer) & ~word_is_zero(scarry);
|
||||
}
|
||||
|
||||
/** Reduce to canonical form. */
|
||||
/* Reduce to canonical form. */
|
||||
void gf_strong_reduce(gf a)
|
||||
{
|
||||
dsword_t scarry;
|
||||
|
@ -135,7 +134,7 @@ void gf_strong_reduce(gf a)
|
|||
assert(word_is_zero(carry + scarry_0));
|
||||
}
|
||||
|
||||
/** Subtract two gf elements d=a-b */
|
||||
/* Subtract two gf elements d=a-b */
|
||||
void gf_sub(gf d, const gf a, const gf b)
|
||||
{
|
||||
gf_sub_RAW(d, a, b);
|
||||
|
@ -143,14 +142,14 @@ void gf_sub(gf d, const gf a, const gf b)
|
|||
gf_weak_reduce(d);
|
||||
}
|
||||
|
||||
/** Add two field elements d = a+b */
|
||||
/* Add two field elements d = a+b */
|
||||
void gf_add(gf d, const gf a, const gf b)
|
||||
{
|
||||
gf_add_RAW(d, a, b);
|
||||
gf_weak_reduce(d);
|
||||
}
|
||||
|
||||
/** Compare a==b */
|
||||
/* Compare a==b */
|
||||
mask_t gf_eq(const gf a, const gf b)
|
||||
{
|
||||
gf c;
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
# include "f_field.h"
|
||||
# include <string.h>
|
||||
|
||||
/** Square x, n times. */
|
||||
/* Square x, n times. */
|
||||
static ossl_inline void gf_sqrn(gf_s * __restrict__ y, const gf x, int n)
|
||||
{
|
||||
gf tmp;
|
||||
|
@ -38,7 +38,7 @@ static ossl_inline void gf_sqrn(gf_s * __restrict__ y, const gf x, int n)
|
|||
|
||||
# define gf_add_nr gf_add_RAW
|
||||
|
||||
/** Subtract mod p. Bias by 2 and don't reduce */
|
||||
/* Subtract mod p. Bias by 2 and don't reduce */
|
||||
static ossl_inline void gf_sub_nr(gf c, const gf a, const gf b)
|
||||
{
|
||||
gf_sub_RAW(c, a, b);
|
||||
|
@ -47,7 +47,7 @@ static ossl_inline void gf_sub_nr(gf c, const gf a, const gf b)
|
|||
gf_weak_reduce(c);
|
||||
}
|
||||
|
||||
/** Subtract mod p. Bias by amt but don't reduce. */
|
||||
/* Subtract mod p. Bias by amt but don't reduce. */
|
||||
static ossl_inline void gf_subx_nr(gf c, const gf a, const gf b, int amt)
|
||||
{
|
||||
gf_sub_RAW(c, a, b);
|
||||
|
@ -56,7 +56,7 @@ static ossl_inline void gf_subx_nr(gf c, const gf a, const gf b, int amt)
|
|||
gf_weak_reduce(c);
|
||||
}
|
||||
|
||||
/** Mul by signed int. Not constant-time WRT the sign of that int. */
|
||||
/* Mul by signed int. Not constant-time WRT the sign of that int. */
|
||||
static ossl_inline void gf_mulw(gf c, const gf a, int32_t w)
|
||||
{
|
||||
if (w > 0) {
|
||||
|
@ -67,13 +67,13 @@ static ossl_inline void gf_mulw(gf c, const gf a, int32_t w)
|
|||
}
|
||||
}
|
||||
|
||||
/** Constant time, x = is_z ? z : y */
|
||||
/* Constant time, x = is_z ? z : y */
|
||||
static ossl_inline void gf_cond_sel(gf x, const gf y, const gf z, mask_t is_z)
|
||||
{
|
||||
constant_time_select(x, y, z, sizeof(gf), is_z, 0);
|
||||
}
|
||||
|
||||
/** Constant time, if (neg) x=-x; */
|
||||
/* Constant time, if (neg) x=-x; */
|
||||
static ossl_inline void gf_cond_neg(gf x, mask_t neg)
|
||||
{
|
||||
gf y;
|
||||
|
@ -81,7 +81,7 @@ static ossl_inline void gf_cond_neg(gf x, mask_t neg)
|
|||
gf_cond_sel(x, x, y, neg);
|
||||
}
|
||||
|
||||
/** Constant time, if (swap) (x,y) = (y,x); */
|
||||
/* Constant time, if (swap) (x,y) = (y,x); */
|
||||
static ossl_inline void gf_cond_swap(gf x, gf_s * __restrict__ y, mask_t swap)
|
||||
{
|
||||
constant_time_cond_swap(x, y, sizeof(gf_s), swap);
|
||||
|
|
|
@ -20,148 +20,146 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** @cond internal */
|
||||
# define DECAF_448_SCALAR_LIMBS ((446-1)/DECAF_WORD_BITS+1)
|
||||
/** @endcond */
|
||||
|
||||
/** The number of bits in a scalar */
|
||||
/* The number of bits in a scalar */
|
||||
# define DECAF_448_SCALAR_BITS 446
|
||||
|
||||
/** Number of bytes in a serialized point. */
|
||||
/* Number of bytes in a serialized point. */
|
||||
# define DECAF_448_SER_BYTES 56
|
||||
|
||||
/** Number of bytes in an elligated point. For now set the same as SER_BYTES
|
||||
/*
|
||||
* Number of bytes in an elligated point. For now set the same as SER_BYTES
|
||||
* but could be different for other curves.
|
||||
*/
|
||||
# define DECAF_448_HASH_BYTES 56
|
||||
|
||||
/** Number of bytes in a serialized scalar. */
|
||||
/* Number of bytes in a serialized scalar. */
|
||||
# define DECAF_448_SCALAR_BYTES 56
|
||||
|
||||
/** Number of bits in the "which" field of an elligator inverse */
|
||||
/* Number of bits in the "which" field of an elligator inverse */
|
||||
# define DECAF_448_INVERT_ELLIGATOR_WHICH_BITS 3
|
||||
|
||||
/** The cofactor the curve would have, if we hadn't removed it */
|
||||
/* The cofactor the curve would have, if we hadn't removed it */
|
||||
# define DECAF_448_REMOVED_COFACTOR 4
|
||||
|
||||
/** X448 encoding ratio. */
|
||||
/* X448 encoding ratio. */
|
||||
# define DECAF_X448_ENCODE_RATIO 2
|
||||
|
||||
/** Number of bytes in an x448 public key */
|
||||
/* Number of bytes in an x448 public key */
|
||||
# define DECAF_X448_PUBLIC_BYTES 56
|
||||
|
||||
/** Number of bytes in an x448 private key */
|
||||
/* Number of bytes in an x448 private key */
|
||||
# define DECAF_X448_PRIVATE_BYTES 56
|
||||
|
||||
/** Twisted Edwards extended homogeneous coordinates */
|
||||
/* Twisted Edwards extended homogeneous coordinates */
|
||||
typedef struct curve448_point_s {
|
||||
/** @cond internal */
|
||||
gf_448_t x, y, z, t;
|
||||
/** @endcond */
|
||||
} curve448_point_t[1];
|
||||
|
||||
/** Precomputed table based on a point. Can be trivial implementation. */
|
||||
/* Precomputed table based on a point. Can be trivial implementation. */
|
||||
struct curve448_precomputed_s;
|
||||
|
||||
/** Precomputed table based on a point. Can be trivial implementation. */
|
||||
/* Precomputed table based on a point. Can be trivial implementation. */
|
||||
typedef struct curve448_precomputed_s curve448_precomputed_s;
|
||||
|
||||
/** Scalar is stored packed, because we don't need the speed. */
|
||||
/* Scalar is stored packed, because we don't need the speed. */
|
||||
typedef struct curve448_scalar_s {
|
||||
/** @cond internal */
|
||||
decaf_word_t limb[DECAF_448_SCALAR_LIMBS];
|
||||
/** @endcond */
|
||||
} curve448_scalar_t[1];
|
||||
|
||||
/** A scalar equal to 1. */
|
||||
/* A scalar equal to 1. */
|
||||
extern const curve448_scalar_t curve448_scalar_one;
|
||||
|
||||
/** A scalar equal to 0. */
|
||||
/* A scalar equal to 0. */
|
||||
extern const curve448_scalar_t curve448_scalar_zero;
|
||||
|
||||
/** The identity point on the curve. */
|
||||
/* The identity point on the curve. */
|
||||
extern const curve448_point_t curve448_point_identity;
|
||||
|
||||
/** An arbitrarily chosen base point on the curve. */
|
||||
/* An arbitrarily chosen base point on the curve. */
|
||||
extern const curve448_point_t curve448_point_base;
|
||||
|
||||
/** Precomputed table for the base point on the curve. */
|
||||
/* Precomputed table for the base point on the curve. */
|
||||
extern const struct curve448_precomputed_s *curve448_precomputed_base;
|
||||
|
||||
/**
|
||||
* @brief Read a scalar from wire format or from bytes.
|
||||
/*
|
||||
* Read a scalar from wire format or from bytes.
|
||||
*
|
||||
* @param [in] ser Serialized form of a scalar.
|
||||
* @param [out] out Deserialized form.
|
||||
* ser (in): Serialized form of a scalar.
|
||||
* out (out): Deserialized form.
|
||||
*
|
||||
* @retval DECAF_SUCCESS The scalar was correctly encoded.
|
||||
* @retval DECAF_FAILURE The scalar was greater than the modulus,
|
||||
* and has been reduced modulo that modulus.
|
||||
* Returns:
|
||||
* DECAF_SUCCESS: The scalar was correctly encoded.
|
||||
* DECAF_FAILURE: The scalar was greater than the modulus, and has been reduced
|
||||
* modulo that modulus.
|
||||
*/
|
||||
__owur decaf_error_t curve448_scalar_decode(curve448_scalar_t out,
|
||||
const unsigned char
|
||||
ser[DECAF_448_SCALAR_BYTES]
|
||||
);
|
||||
__owur decaf_error_t curve448_scalar_decode(
|
||||
curve448_scalar_t out,
|
||||
const unsigned char ser[DECAF_448_SCALAR_BYTES]);
|
||||
|
||||
/**
|
||||
* @brief Read a scalar from wire format or from bytes. Reduces mod
|
||||
* scalar prime.
|
||||
/*
|
||||
* Read a scalar from wire format or from bytes. Reduces mod scalar prime.
|
||||
*
|
||||
* @param [in] ser Serialized form of a scalar.
|
||||
* @param [in] ser_len Length of serialized form.
|
||||
* @param [out] out Deserialized form.
|
||||
* ser (in): Serialized form of a scalar.
|
||||
* ser_len (in): Length of serialized form.
|
||||
* out (out): Deserialized form.
|
||||
*/
|
||||
void curve448_scalar_decode_long(curve448_scalar_t out,
|
||||
const unsigned char *ser, size_t ser_len);
|
||||
|
||||
/**
|
||||
* @brief Serialize a scalar to wire format.
|
||||
/*
|
||||
* Serialize a scalar to wire format.
|
||||
*
|
||||
* @param [out] ser Serialized form of a scalar.
|
||||
* @param [in] s Deserialized scalar.
|
||||
* ser (out): Serialized form of a scalar.
|
||||
* s (in): Deserialized scalar.
|
||||
*/
|
||||
void curve448_scalar_encode(unsigned char ser[DECAF_448_SCALAR_BYTES],
|
||||
const curve448_scalar_t s);
|
||||
|
||||
/**
|
||||
* @brief Add two scalars. The scalars may use the same memory.
|
||||
* @param [in] a One scalar.
|
||||
* @param [in] b Another scalar.
|
||||
* @param [out] out a+b.
|
||||
/*
|
||||
* Add two scalars. The scalars may use the same memory.
|
||||
*
|
||||
* a (in): One scalar.
|
||||
* b (in): Another scalar.
|
||||
* out (out): a+b.
|
||||
*/
|
||||
void curve448_scalar_add(curve448_scalar_t out,
|
||||
const curve448_scalar_t a, const curve448_scalar_t b);
|
||||
|
||||
/**
|
||||
* @brief Subtract two scalars. The scalars may use the same memory.
|
||||
* @param [in] a One scalar.
|
||||
* @param [in] b Another scalar.
|
||||
* @param [out] out a-b.
|
||||
/*
|
||||
* Subtract two scalars. The scalars may use the same memory.
|
||||
* a (in): One scalar.
|
||||
* b (in): Another scalar.
|
||||
* out (out): a-b.
|
||||
*/
|
||||
void curve448_scalar_sub(curve448_scalar_t out,
|
||||
const curve448_scalar_t a, const curve448_scalar_t b);
|
||||
|
||||
/**
|
||||
* @brief Multiply two scalars. The scalars may use the same memory.
|
||||
* @param [in] a One scalar.
|
||||
* @param [in] b Another scalar.
|
||||
* @param [out] out a*b.
|
||||
/*
|
||||
* Multiply two scalars. The scalars may use the same memory.
|
||||
*
|
||||
* a (in): One scalar.
|
||||
* b (in): Another scalar.
|
||||
* out (out): a*b.
|
||||
*/
|
||||
void curve448_scalar_mul(curve448_scalar_t out,
|
||||
const curve448_scalar_t a, const curve448_scalar_t b);
|
||||
|
||||
/**
|
||||
* @brief Halve a scalar. The scalars may use the same memory.
|
||||
* @param [in] a A scalar.
|
||||
* @param [out] out a/2.
|
||||
/*
|
||||
* Halve a scalar. The scalars may use the same memory.
|
||||
*
|
||||
* a (in): A scalar.
|
||||
* out (out): a/2.
|
||||
*/
|
||||
void curve448_scalar_halve(curve448_scalar_t out, const curve448_scalar_t a);
|
||||
|
||||
/**
|
||||
* @brief Copy a scalar. The scalars may use the same memory, in which
|
||||
* case this function does nothing.
|
||||
* @param [in] a A scalar.
|
||||
* @param [out] out Will become a copy of a.
|
||||
/*
|
||||
* Copy a scalar. The scalars may use the same memory, in which case this
|
||||
* function does nothing.
|
||||
*
|
||||
* a (in): A scalar.
|
||||
* out (out): Will become a copy of a.
|
||||
*/
|
||||
static ossl_inline void curve448_scalar_copy(curve448_scalar_t out,
|
||||
const curve448_scalar_t a)
|
||||
|
@ -169,12 +167,12 @@ static ossl_inline void curve448_scalar_copy(curve448_scalar_t out,
|
|||
*out = *a;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Copy a point. The input and output may alias,
|
||||
* in which case this function does nothing.
|
||||
/*
|
||||
* Copy a point. The input and output may alias, in which case this function
|
||||
* does nothing.
|
||||
*
|
||||
* @param [out] a A copy of the point.
|
||||
* @param [in] b Any point.
|
||||
* a (out): A copy of the point.
|
||||
* b (in): Any point.
|
||||
*/
|
||||
static ossl_inline void curve448_point_copy(curve448_point_t a,
|
||||
const curve448_point_t b)
|
||||
|
@ -182,47 +180,48 @@ static ossl_inline void curve448_point_copy(curve448_point_t a,
|
|||
*a = *b;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Test whether two points are equal. If yes, return
|
||||
* DECAF_TRUE, else return DECAF_FALSE.
|
||||
/*
|
||||
* Test whether two points are equal. If yes, return DECAF_TRUE, else return
|
||||
* DECAF_FALSE.
|
||||
*
|
||||
* @param [in] a A point.
|
||||
* @param [in] b Another point.
|
||||
* @retval DECAF_TRUE The points are equal.
|
||||
* @retval DECAF_FALSE The points are not equal.
|
||||
* a (in): A point.
|
||||
* b (in): Another point.
|
||||
*
|
||||
* Returns:
|
||||
* DECAF_TRUE: The points are equal.
|
||||
* DECAF_FALSE: The points are not equal.
|
||||
*/
|
||||
__owur decaf_bool_t curve448_point_eq(const curve448_point_t a,
|
||||
const curve448_point_t b);
|
||||
|
||||
/**
|
||||
* @brief Double a point. Equivalent to
|
||||
* curve448_point_add(two_a,a,a), but potentially faster.
|
||||
/*
|
||||
* Double a point. Equivalent to curve448_point_add(two_a,a,a), but potentially
|
||||
* faster.
|
||||
*
|
||||
* @param [out] two_a The sum a+a.
|
||||
* @param [in] a A point.
|
||||
* two_a (out): The sum a+a.
|
||||
* a (in): A point.
|
||||
*/
|
||||
void curve448_point_double(curve448_point_t two_a, const curve448_point_t a);
|
||||
|
||||
/**
|
||||
* @brief RFC 7748 Diffie-Hellman scalarmul. This function uses a different
|
||||
/*
|
||||
* RFC 7748 Diffie-Hellman scalarmul. This function uses a different
|
||||
* (non-Decaf) encoding.
|
||||
*
|
||||
* @param [out] scaled The scaled point base*scalar
|
||||
* @param [in] base The point to be scaled.
|
||||
* @param [in] scalar The scalar to multiply by.
|
||||
* out (out): The scaled point base*scalar
|
||||
* base (in): The point to be scaled.
|
||||
* scalar (in): The scalar to multiply by.
|
||||
*
|
||||
* @retval DECAF_SUCCESS The scalarmul succeeded.
|
||||
* @retval DECAF_FAILURE The scalarmul didn't succeed, because the base
|
||||
* point is in a small subgroup.
|
||||
* Returns:
|
||||
* DECAF_SUCCESS: The scalarmul succeeded.
|
||||
* DECAF_FAILURE: The scalarmul didn't succeed, because the base point is in a
|
||||
* small subgroup.
|
||||
*/
|
||||
__owur decaf_error_t decaf_x448(uint8_t out[DECAF_X448_PUBLIC_BYTES],
|
||||
const uint8_t base[DECAF_X448_PUBLIC_BYTES],
|
||||
const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]
|
||||
);
|
||||
const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]);
|
||||
|
||||
/**
|
||||
* @brief Multiply a point by DECAF_X448_ENCODE_RATIO,
|
||||
* then encode it like RFC 7748.
|
||||
/*
|
||||
* Multiply a point by DECAF_X448_ENCODE_RATIO, then encode it like RFC 7748.
|
||||
*
|
||||
* This function is mainly used internally, but is exported in case
|
||||
* it will be useful.
|
||||
|
@ -237,83 +236,73 @@ __owur decaf_error_t decaf_x448(uint8_t out[DECAF_X448_PUBLIC_BYTES],
|
|||
* will be DECAF_X448_ENCODE_RATIO times the X448
|
||||
* base point.
|
||||
*
|
||||
* @param [out] out The scaled and encoded point.
|
||||
* @param [in] p The point to be scaled and encoded.
|
||||
* out (out): The scaled and encoded point.
|
||||
* p (in): The point to be scaled and encoded.
|
||||
*/
|
||||
void curve448_point_mul_by_ratio_and_encode_like_x448(uint8_t
|
||||
out
|
||||
[DECAF_X448_PUBLIC_BYTES],
|
||||
const curve448_point_t p);
|
||||
void curve448_point_mul_by_ratio_and_encode_like_x448(
|
||||
uint8_t out[DECAF_X448_PUBLIC_BYTES],
|
||||
const curve448_point_t p);
|
||||
|
||||
/** The base point for X448 Diffie-Hellman */
|
||||
/* The base point for X448 Diffie-Hellman */
|
||||
extern const uint8_t decaf_x448_base_point[DECAF_X448_PUBLIC_BYTES];
|
||||
|
||||
/**
|
||||
* @brief RFC 7748 Diffie-Hellman base point scalarmul. This function uses
|
||||
* a different (non-Decaf) encoding.
|
||||
*
|
||||
* Does exactly the same thing as decaf_x448_generate_key,
|
||||
* but has a better name.
|
||||
*
|
||||
* @param [out] scaled The scaled point base*scalar
|
||||
* @param [in] scalar The scalar to multiply by.
|
||||
/*
|
||||
* RFC 7748 Diffie-Hellman base point scalarmul. This function uses a different
|
||||
* (non-Decaf) encoding.
|
||||
*
|
||||
* out (out): The scaled point base*scalar
|
||||
* scalar (in): The scalar to multiply by.
|
||||
*/
|
||||
void decaf_x448_derive_public_key(uint8_t out[DECAF_X448_PUBLIC_BYTES],
|
||||
const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]
|
||||
);
|
||||
void decaf_x448_derive_public_key(
|
||||
uint8_t out[DECAF_X448_PUBLIC_BYTES],
|
||||
const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]);
|
||||
|
||||
/**
|
||||
* @brief Multiply a precomputed base point by a scalar:
|
||||
* scaled = scalar*base.
|
||||
* Some implementations do not include precomputed points; for
|
||||
* those implementations, this function is the same as
|
||||
* curve448_point_scalarmul
|
||||
/*
|
||||
* Multiply a precomputed base point by a scalar: out = scalar*base.
|
||||
*
|
||||
* @param [out] scaled The scaled point base*scalar
|
||||
* @param [in] base The point to be scaled.
|
||||
* @param [in] scalar The scalar to multiply by.
|
||||
* scaled (out): The scaled point base*scalar
|
||||
* base (in): The point to be scaled.
|
||||
* scalar (in): The scalar to multiply by.
|
||||
*/
|
||||
void curve448_precomputed_scalarmul(curve448_point_t scaled,
|
||||
const curve448_precomputed_s * base,
|
||||
const curve448_scalar_t scalar);
|
||||
|
||||
/**
|
||||
* @brief Multiply two base points by two scalars:
|
||||
* scaled = scalar1*curve448_point_base + scalar2*base2.
|
||||
/*
|
||||
* Multiply two base points by two scalars:
|
||||
* combo = scalar1*curve448_point_base + scalar2*base2.
|
||||
*
|
||||
* Otherwise equivalent to curve448_point_double_scalarmul, but may be
|
||||
* faster at the expense of being variable time.
|
||||
*
|
||||
* @param [out] combo The linear combination scalar1*base + scalar2*base2.
|
||||
* @param [in] scalar1 A first scalar to multiply by.
|
||||
* @param [in] base2 A second point to be scaled.
|
||||
* @param [in] scalar2 A second scalar to multiply by.
|
||||
* combo (out): The linear combination scalar1*base + scalar2*base2.
|
||||
* scalar1 (in): A first scalar to multiply by.
|
||||
* base2 (in): A second point to be scaled.
|
||||
* scalar2 (in) A second scalar to multiply by.
|
||||
*
|
||||
* @warning: This function takes variable time, and may leak the scalars
|
||||
* used. It is designed for signature verification.
|
||||
* Warning: This function takes variable time, and may leak the scalars used.
|
||||
* It is designed for signature verification.
|
||||
*/
|
||||
void curve448_base_double_scalarmul_non_secret(curve448_point_t combo,
|
||||
const curve448_scalar_t scalar1,
|
||||
const curve448_point_t base2,
|
||||
const curve448_scalar_t scalar2);
|
||||
|
||||
/**
|
||||
* @brief Test that a point is valid, for debugging purposes.
|
||||
/*
|
||||
* Test that a point is valid, for debugging purposes.
|
||||
*
|
||||
* @param [in] to_test The point to test.
|
||||
* @retval DECAF_TRUE The point is valid.
|
||||
* @retval DECAF_FALSE The point is invalid.
|
||||
* to_test (in): The point to test.
|
||||
*
|
||||
* Returns:
|
||||
* DECAF_TRUE The point is valid.
|
||||
* DECAF_FALSE The point is invalid.
|
||||
*/
|
||||
__owur decaf_bool_t curve448_point_valid(const curve448_point_t to_test);
|
||||
|
||||
/**
|
||||
* @brief Overwrite scalar with zeros.
|
||||
*/
|
||||
/* Overwrite scalar with zeros. */
|
||||
void curve448_scalar_destroy(curve448_scalar_t scalar);
|
||||
|
||||
/**
|
||||
* @brief Overwrite point with zeros.
|
||||
*/
|
||||
/* Overwrite point with zeros. */
|
||||
void curve448_point_destroy(curve448_point_t point);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -16,31 +16,36 @@
|
|||
#include "point_448.h"
|
||||
|
||||
static const decaf_word_t MONTGOMERY_FACTOR = (decaf_word_t) 0x3bd440fae918bc5;
|
||||
static const curve448_scalar_t sc_p = { {{
|
||||
SC_LIMB(0x2378c292ab5844f3),
|
||||
SC_LIMB(0x216cc2728dc58f55),
|
||||
SC_LIMB(0xc44edb49aed63690),
|
||||
SC_LIMB(0xffffffff7cca23e9),
|
||||
SC_LIMB(0xffffffffffffffff),
|
||||
SC_LIMB(0xffffffffffffffff),
|
||||
SC_LIMB(0x3fffffffffffffff)
|
||||
}}
|
||||
}, sc_r2 = { { {
|
||||
static const curve448_scalar_t sc_p = {
|
||||
{
|
||||
{
|
||||
SC_LIMB(0x2378c292ab5844f3), SC_LIMB(0x216cc2728dc58f55),
|
||||
SC_LIMB(0xc44edb49aed63690), SC_LIMB(0xffffffff7cca23e9),
|
||||
SC_LIMB(0xffffffffffffffff), SC_LIMB(0xffffffffffffffff),
|
||||
SC_LIMB(0x3fffffffffffffff)
|
||||
}
|
||||
}
|
||||
}, sc_r2 = {
|
||||
{
|
||||
{
|
||||
|
||||
SC_LIMB(0xe3539257049b9b60), SC_LIMB(0x7af32c4bc1b195d9),
|
||||
SC_LIMB(0x0d66de2388ea1859), SC_LIMB(0xae17cf725ee4d838),
|
||||
SC_LIMB(0x1a9cc14ba3c47c44), SC_LIMB(0x2052bcb7e4d070af),
|
||||
SC_LIMB(0x3402a939f823b729)
|
||||
}}};
|
||||
SC_LIMB(0x0d66de2388ea1859), SC_LIMB(0xae17cf725ee4d838),
|
||||
SC_LIMB(0x1a9cc14ba3c47c44), SC_LIMB(0x2052bcb7e4d070af),
|
||||
SC_LIMB(0x3402a939f823b729)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/* End of template stuff */
|
||||
|
||||
#define WBITS DECAF_WORD_BITS /* NB this may be different from ARCH_WORD_BITS */
|
||||
|
||||
const curve448_scalar_t curve448_scalar_one = { {{1}} }, curve448_scalar_zero = { { {
|
||||
0}}};
|
||||
const curve448_scalar_t curve448_scalar_one = {{{1}}};
|
||||
const curve448_scalar_t curve448_scalar_zero = {{{0}}};
|
||||
|
||||
/** {extra,accum} - sub +? p
|
||||
/*
|
||||
* {extra,accum} - sub +? p
|
||||
* Must have extra <= 1
|
||||
*/
|
||||
static void sc_subx(curve448_scalar_t out,
|
||||
|
@ -67,8 +72,8 @@ static void sc_subx(curve448_scalar_t out,
|
|||
}
|
||||
}
|
||||
|
||||
static void sc_montmul(curve448_scalar_t out,
|
||||
const curve448_scalar_t a, const curve448_scalar_t b)
|
||||
static void sc_montmul(curve448_scalar_t out, const curve448_scalar_t a,
|
||||
const curve448_scalar_t b)
|
||||
{
|
||||
unsigned int i, j;
|
||||
decaf_word_t accum[DECAF_448_SCALAR_LIMBS + 1] = { 0 };
|
||||
|
@ -104,24 +109,25 @@ static void sc_montmul(curve448_scalar_t out,
|
|||
sc_subx(out, accum, sc_p, sc_p, hi_carry);
|
||||
}
|
||||
|
||||
void curve448_scalar_mul(curve448_scalar_t out,
|
||||
const curve448_scalar_t a, const curve448_scalar_t b)
|
||||
void curve448_scalar_mul(curve448_scalar_t out, const curve448_scalar_t a,
|
||||
const curve448_scalar_t b)
|
||||
{
|
||||
sc_montmul(out, a, b);
|
||||
sc_montmul(out, out, sc_r2);
|
||||
}
|
||||
|
||||
void curve448_scalar_sub(curve448_scalar_t out,
|
||||
const curve448_scalar_t a, const curve448_scalar_t b)
|
||||
void curve448_scalar_sub(curve448_scalar_t out, const curve448_scalar_t a,
|
||||
const curve448_scalar_t b)
|
||||
{
|
||||
sc_subx(out, a->limb, b, sc_p, 0);
|
||||
}
|
||||
|
||||
void curve448_scalar_add(curve448_scalar_t out,
|
||||
const curve448_scalar_t a, const curve448_scalar_t b)
|
||||
void curve448_scalar_add(curve448_scalar_t out, const curve448_scalar_t a,
|
||||
const curve448_scalar_t b)
|
||||
{
|
||||
decaf_dword_t chain = 0;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
|
||||
chain = (chain + a->limb[i]) + b->limb[i];
|
||||
out->limb[i] = chain;
|
||||
|
@ -135,27 +141,26 @@ static ossl_inline void scalar_decode_short(curve448_scalar_t s,
|
|||
unsigned int nbytes)
|
||||
{
|
||||
unsigned int i, j, k = 0;
|
||||
|
||||
for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
|
||||
decaf_word_t out = 0;
|
||||
for (j = 0; j < sizeof(decaf_word_t) && k < nbytes; j++, k++) {
|
||||
|
||||
for (j = 0; j < sizeof(decaf_word_t) && k < nbytes; j++, k++)
|
||||
out |= ((decaf_word_t) ser[k]) << (8 * j);
|
||||
}
|
||||
s->limb[i] = out;
|
||||
}
|
||||
}
|
||||
|
||||
decaf_error_t curve448_scalar_decode(curve448_scalar_t s,
|
||||
const unsigned char
|
||||
ser[DECAF_448_SCALAR_BYTES]
|
||||
)
|
||||
decaf_error_t curve448_scalar_decode(
|
||||
curve448_scalar_t s,
|
||||
const unsigned char ser[DECAF_448_SCALAR_BYTES])
|
||||
{
|
||||
unsigned int i;
|
||||
decaf_dsword_t accum = 0;
|
||||
|
||||
scalar_decode_short(s, ser, DECAF_448_SCALAR_BYTES);
|
||||
for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
|
||||
for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++)
|
||||
accum = (accum + s->limb[i] - sc_p->limb[i]) >> WBITS;
|
||||
}
|
||||
/* Here accum == 0 or -1 */
|
||||
|
||||
curve448_scalar_mul(s, s, curve448_scalar_one); /* ham-handed reduce */
|
||||
|
@ -209,10 +214,10 @@ void curve448_scalar_encode(unsigned char ser[DECAF_448_SCALAR_BYTES],
|
|||
const curve448_scalar_t s)
|
||||
{
|
||||
unsigned int i, j, k = 0;
|
||||
|
||||
for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
|
||||
for (j = 0; j < sizeof(decaf_word_t); j++, k++) {
|
||||
for (j = 0; j < sizeof(decaf_word_t); j++, k++)
|
||||
ser[k] = s->limb[i] >> (8 * j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -226,8 +231,7 @@ void curve448_scalar_halve(curve448_scalar_t out, const curve448_scalar_t a)
|
|||
out->limb[i] = chain;
|
||||
chain >>= DECAF_WORD_BITS;
|
||||
}
|
||||
for (i = 0; i < DECAF_448_SCALAR_LIMBS - 1; i++) {
|
||||
for (i = 0; i < DECAF_448_SCALAR_LIMBS - 1; i++)
|
||||
out->limb[i] = out->limb[i] >> 1 | out->limb[i + 1] << (WBITS - 1);
|
||||
}
|
||||
out->limb[i] = out->limb[i] >> 1 | chain << (WBITS - 1);
|
||||
}
|
||||
|
|
|
@ -162,7 +162,6 @@ static ossl_inline big_register_t br_is_zero(big_register_t x)
|
|||
static ossl_inline big_register_t br_is_zero(big_register_t x)
|
||||
{
|
||||
return (big_register_t) _mm_cmpeq_epi32((__m128i) x, _mm_setzero_si128());
|
||||
// return (big_register_t)(x == br_set_to_mask(0));
|
||||
}
|
||||
# elif defined(__ARM_NEON__)
|
||||
static ossl_inline big_register_t br_is_zero(big_register_t x)
|
||||
|
@ -196,7 +195,7 @@ static ossl_inline big_register_t br_is_zero(big_register_t x)
|
|||
*/
|
||||
static ossl_inline decaf_bool_t mask_to_bool(mask_t m)
|
||||
{
|
||||
return (decaf_sword_t) (sword_t) m;
|
||||
return (decaf_sword_t)(sword_t)m;
|
||||
}
|
||||
|
||||
static ossl_inline mask_t bool_to_mask(decaf_bool_t m)
|
||||
|
@ -204,13 +203,13 @@ static ossl_inline mask_t bool_to_mask(decaf_bool_t m)
|
|||
/* On most arches this will be optimized to a simple cast. */
|
||||
mask_t ret = 0;
|
||||
unsigned int i;
|
||||
|
||||
unsigned int limit = sizeof(decaf_bool_t) / sizeof(mask_t);
|
||||
|
||||
if (limit < 1)
|
||||
limit = 1;
|
||||
for (i = 0; i < limit; i++) {
|
||||
for (i = 0; i < limit; i++)
|
||||
ret |= ~word_is_zero(m >> (i * 8 * sizeof(word_t)));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue